commit edccce830a744b7ad84a6eda91277f778da4760f Author: CI Date: Fri Jun 28 12:02:37 2024 +0000 Build branch main with version main (ec3c23e) Build pipeline: vsh-ci-template-v8q2j Source commit: https://github.com/viash-hub/playground/commit/ec3c23e349a796e57e6634e140325afde4515bbf Source message: Update test data script diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f8684f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.nextflow.log* +.nextflow/ +work +target/ +scm.config +test_data \ No newline at end of file diff --git a/_viash.yaml b/_viash.yaml new file mode 100644 index 0000000..4b90754 --- /dev/null +++ b/_viash.yaml @@ -0,0 +1,13 @@ +name: playground +description: | + A collection of bioinformatics pipelines to illustrate the use of biobox (and biotools). +license: MIT +keywords: [bioinformatics, sequence, high-throughput, mapping, counting, pipeline] +links: + issue_tracker: https://github.com/viash-hub/playground/issues + repository: https://github.com/viash-hub/playground + +viash_version: 0.9.0-RC6 + +config_mods: | + .requirements.commands := ['ps'] diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..5b3d280 --- /dev/null +++ b/main.nf @@ -0,0 +1,3 @@ +workflow { +print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.") +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..f8bb790 --- /dev/null +++ b/nextflow.config @@ -0,0 +1 @@ +docker.fixOwnership = true \ No newline at end of file diff --git a/src/mapping_and_qc/config.vsh.yaml b/src/mapping_and_qc/config.vsh.yaml new file mode 100644 index 0000000..3105a0b --- /dev/null +++ b/src/mapping_and_qc/config.vsh.yaml @@ -0,0 +1,55 @@ +name: mapping_and_qc +description: Run STAR and QC +argument_groups: + - name: Input arguments + arguments: + - name: --input_r1 + type: file + direction: input + required: true + - name: --input_r2 + type: file + direction: input + required: true + - name: --reference + type: file + required: true + - name: Output arguments + arguments: + - name: --multiqc_output + type: file + example: multiqc.html + direction: output + required: true + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + + +dependencies: + - name: cutadapt + repository: bb + - name: pear + repository: bb + - name: falco + repository: bb + - name: multiqc + repository: bb + - name: star/star_align_reads + repository: bb + - name: samtools/samtools_stats + repository: bb + +repositories: + - name: bb + type: vsh + repo: vsh/biobox + tag: v0.1 + +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/mapping_and_qc/main.nf b/src/mapping_and_qc/main.nf new file mode 100644 index 0000000..fdeff20 --- /dev/null +++ b/src/mapping_and_qc/main.nf @@ -0,0 +1,90 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | falco.run( + fromState: {id, state -> + def input_list = [state.input_r1, state.input_r2] + [ + "input": input_list, + "outdir": "\$id.falco", + "report_filename": null, + "data_filename": null, + "summary_filename": null, + ] + }, + toState: [ + "output_falco": "outdir", + ] + ) + | cutadapt.run( + fromState: {id, state -> + [ + "input": state.input_r1, + "input_r2": state.input_r2, + "quality_cutoff": "20", // Could make this a parameter + "adapter": "CTGTCTCTTATACACATCT", // Could make this a parameter + "adapter_r2": "CTGTCTCTTATACACATCT", // Could make this a parameter + "output": "*.fastq", + ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trimmed_r1": output_state["output"][0], + "trimmed_r2": output_state["output"][1], + "output_cutadapt": output_state["output"] + ] + def new_state = state + newKeys + return new_state + } + ) + | pear.run( + fromState: [ + "forward_fastq": "trimmed_r1", + "reverse_fastq": "trimmed_r2", + ], + toState: [ + "output_pear": "assembled", + ] + ) + | star_align_reads.run( + fromState: [ + "input": "output_pear", + "genomeDir": "reference", + ], + toState: [ + "output_star": "aligned_reads", + ] + ) + | samtools_stats.run( + fromState: [ + "input": "output_star", + ], + toState: [ + "output_samtools_stats": "output", + ] + ) + | toSortedList() + | map { events -> + def new_id = "multiqc" + def join_id = events[0][0] + def bam_samtools_stats_dirs = events.collect{it[1].output_samtools_stats.getParent()} + def falco_dirs = events.collect{it[1].output_falco} + return [new_id, ["input": bam_samtools_stats_dirs + falco_dirs, "_meta": ["join_id": join_id]]] + } + | multiqc.run( + fromState: [ + "input": "input", + "output_report": "multiqc_output", + ], + toState: [ + "multiqc_output": "output_report", + ] + ) + | setState(["multiqc_output", "_meta"]) + + emit: + output_ch +} diff --git a/target/.build.yaml b/target/.build.yaml new file mode 100644 index 0000000..e69de29 diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/.config.vsh.yaml new file mode 100644 index 0000000..d26efaf --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/.config.vsh.yaml @@ -0,0 +1,733 @@ +name: "cutadapt" +version: "v0.1.0" +argument_groups: +- name: "Specify Adapters for R1" + arguments: + - type: "string" + name: "--adapter" + alternatives: + - "-a" + description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\ + \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\ + \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\ + \ the read.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--front" + alternatives: + - "-g" + description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\ + \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\ + \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\ + \ the adapter is\nonly found if it is a prefix of the read.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--anywhere" + alternatives: + - "-b" + description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\ + \ (paired data: of the first read). Both types of\nmatches as described under\ + \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\ + \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\ + for rescuing failed library preparations - do not use if\nyou know which end\ + \ your adapter was ligated to!\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Specify Adapters using Fasta files for R1" + arguments: + - type: "file" + name: "--adapter_fasta" + description: "Fasta file containing sequences of an adapter ligated to the 3'\ + \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\ + trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\ + \ found if it is a suffix of the read.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--front_fasta" + description: "Fasta file containing sequences of an adapter ligated to the 5'\ + \ end (paired data:\nof the first read). The adapter and any preceding bases\n\ + are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\ + \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\ + \ the read.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--anywhere_fasta" + description: "Fasta file containing sequences of an adapter that may be ligated\ + \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\ + \ as described under -a and -g are allowed. If the\nfirst base of the read is\ + \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\ + \ option is mostly\nfor rescuing failed library preparations - do not use if\n\ + you know which end your adapter was ligated to!\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Specify Adapters for R2" + arguments: + - type: "string" + name: "--adapter_r2" + alternatives: + - "-A" + description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\ + \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\ + \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\ + \ the read.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--front_r2" + alternatives: + - "-G" + description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\ + \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\ + \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\ + \ the adapter is\nonly found if it is a prefix of the read.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--anywhere_r2" + alternatives: + - "-B" + description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\ + \ (paired data: of the first read). Both types of\nmatches as described under\ + \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\ + \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\ + for rescuing failed library preparations - do not use if\nyou know which end\ + \ your adapter was ligated to!\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Specify Adapters using Fasta files for R2" + arguments: + - type: "file" + name: "--adapter_r2_fasta" + description: "Fasta file containing sequences of an adapter ligated to the 3'\ + \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\ + trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\ + \ found if it is a suffix of the read.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--front_r2_fasta" + description: "Fasta file containing sequences of an adapter ligated to the 5'\ + \ end (paired data:\nof the first read). The adapter and any preceding bases\n\ + are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\ + \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\ + \ the read.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--anywhere_r2_fasta" + description: "Fasta file containing sequences of an adapter that may be ligated\ + \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\ + \ as described under -a and -g are allowed. If the\nfirst base of the read is\ + \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\ + \ option is mostly\nfor rescuing failed library preparations - do not use if\n\ + you know which end your adapter was ligated to!\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Paired-end options" + arguments: + - type: "boolean_true" + name: "--pair_adapters" + description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\ + \ are removed from each read pair.\n" + info: null + direction: "input" + - type: "string" + name: "--pair_filter" + description: "Which of the reads in a paired-end read have to match the\nfiltering\ + \ criterion in order for the pair to be filtered.\n" + info: null + required: false + choices: + - "any" + - "both" + - "first" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--interleaved" + description: "Read and/or write interleaved paired-end reads.\n" + info: null + direction: "input" +- name: "Input parameters" + arguments: + - type: "file" + name: "--input" + description: "Input fastq file for single-end reads or R1 for paired-end reads.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_r2" + description: "Input fastq file for R2 in the case of paired-end reads.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--error_rate" + alternatives: + - "-E" + - "--errors" + description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\ + \ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\ + \ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n" + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_false" + name: "--no_indels" + description: "Allow only mismatches in alignments.\n" + info: null + direction: "input" + - type: "integer" + name: "--times" + alternatives: + - "-n" + description: "Remove up to COUNT adapters from each read. Default: 1.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--overlap" + alternatives: + - "-O" + description: "Require MINLENGTH overlap between read and adapter for an\nadapter\ + \ to be found. The default is 3.\n" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--match_read_wildcards" + description: "Interpret IUPAC wildcards in reads.\n" + info: null + direction: "input" + - type: "boolean_false" + name: "--no_match_adapter_wildcards" + description: "Do not interpret IUPAC wildcards in adapters.\n" + info: null + direction: "input" + - type: "string" + name: "--action" + description: "What to do if a match was found. trim: trim adapter and\nup- or\ + \ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\ + \ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\ + The default is trim.\n" + info: null + example: + - "trim" + required: false + choices: + - "trim" + - "retain" + - "mask" + - "lowercase" + - "none" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--revcomp" + alternatives: + - "--rc" + description: "Check both the read and its reverse complement for adapter\nmatches.\ + \ If match is on reverse-complemented version,\noutput that one.\n" + info: null + direction: "input" +- name: "Read modifications" + arguments: + - type: "integer" + name: "--cut" + alternatives: + - "-u" + description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\ + option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\ + \ is negative, remove bases from the end.\nCan be used twice if LENs have different\ + \ signs. Applied\n*before* adapter trimming.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--cut_r2" + description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\ + \ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\ + Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--nextseq_trim" + description: "NextSeq-specific quality trimming (each read). Trims also\ndark\ + \ cycles appearing as high-quality G bases.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quality_cutoff" + alternatives: + - "-q" + description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\ + \ adapter removal. Applied to both reads if data is\npaired. If one value is\ + \ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\ + \ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quality_cutoff_r2" + alternatives: + - "-Q" + description: "Quality-trimming cutoff for R2. Default: same as for R1\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--quality_base" + description: "Assume that quality values in FASTQ are encoded as\nascii(quality\ + \ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\ + \ is 33.\n" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--poly_a" + description: "Trim poly-A tails" + info: null + direction: "input" + - type: "integer" + name: "--length" + alternatives: + - "-l" + description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\ + \ while negative ones remove bases at the beginning.\nThis and the following\ + \ modifications are applied after\nadapter trimming.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--trim_n" + description: "Trim N's on ends of reads." + info: null + direction: "input" + - type: "string" + name: "--length_tag" + description: "Search for TAG followed by a decimal number in the\ndescription\ + \ field of the read. Replace the decimal number\nwith the correct length of\ + \ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\ + \ like\n'length=123'.\n" + info: null + example: + - "length=" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strip_suffix" + description: "Remove this suffix from read names if present. Can be\ngiven multiple\ + \ times.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--prefix" + alternatives: + - "-x" + description: "Add this prefix to read names. Use {name} to insert the\nname of\ + \ the matching adapter.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--suffix" + alternatives: + - "-y" + description: "Add this suffix to read names; can also include {name}\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--rename" + description: "Rename reads using TEMPLATE containing variables such as\n{id},\ + \ {adapter_name} etc. (see documentation)\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--zero_cap" + alternatives: + - "-z" + description: "Change negative quality values to zero." + info: null + direction: "input" +- name: "Filtering of processed reads" + description: "Filters are applied after above read modifications. Paired-end reads\ + \ are\nalways discarded pairwise (see also --pair_filter).\n" + arguments: + - type: "string" + name: "--minimum_length" + alternatives: + - "-m" + description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\ + \ reads, the minimum lengths for R1 and R2 can be specified separately by separating\ + \ them with a colon (:).\nIf the colon syntax is not used, the same minimum\ + \ length applies to both reads, as discussed above.\nAlso, one of the values\ + \ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\ + \ of R1 must be at least 17, but the length of R2 is ignored.\n" + info: null + example: + - "0" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--maximum_length" + alternatives: + - "-M" + description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\ + \ see the remark for --minimum_length\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--max_n" + description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\ + \ between 0 and 1, it is interpreted as a fraction\nof the read length.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--max_expected_errors" + alternatives: + - "--max_ee" + description: "Discard reads whose expected number of errors (computed\nfrom quality\ + \ values) exceeds ERRORS.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--max_average_error_rate" + alternatives: + - "--max_aer" + description: "as --max_expected_errors (see above), but divided by\nlength to\ + \ account for reads of varying length.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--discard_trimmed" + alternatives: + - "--discard" + description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\ + \ too many randomly matching reads.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--discard_untrimmed" + alternatives: + - "--trimmed_only" + description: "Discard reads that do not contain an adapter.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--discard_casava" + description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n" + info: null + direction: "input" +- name: "Output parameters" + arguments: + - type: "string" + name: "--report" + description: "Which type of report to print: 'full' (default) or 'minimal'.\n" + info: null + example: + - "full" + required: false + choices: + - "full" + - "minimal" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--json" + description: "Write report in JSON format to this file.\n" + info: null + direction: "input" + - type: "file" + name: "--output" + description: "Glob pattern for matching the expected output files.\nShould include\ + \ `$output_dir`.\n" + info: null + example: + - "fastq/*_001.fast[a,q]" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--fasta" + description: "Output FASTA to standard output even on FASTQ input.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--info_file" + description: "Write information about each read and its adapter matches\ninto\ + \ info.txt in the output directory.\nSee the documentation for the file format.\n" + info: null + direction: "input" +- name: "Debug" + arguments: + - type: "boolean_true" + name: "--debug" + description: "Print debug information" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "RNA-seq" +- "scRNA-seq" +- "high-throughput" +license: "MIT" +references: + doi: + - "10.14806/ej.17.1.200" +links: + repository: "https://github.com/marcelm/cutadapt" + homepage: "https://cutadapt.readthedocs.io" + documentation: "https://cutadapt.readthedocs.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.12" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "python" + user: false + pip: + - "cutadapt" + upgrade: true + - type: "docker" + run: + - "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/cutadapt/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/cutadapt" + executable: "target/nextflow/cutadapt/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/main.nf new file mode 100644 index 0000000..8956722 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/main.nf @@ -0,0 +1,4371 @@ +// cutadapt v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "cutadapt", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Specify Adapters for R1", + "arguments" : [ + { + "type" : "string", + "name" : "--adapter", + "alternatives" : [ + "-a" + ], + "description" : "Sequence of an adapter ligated to the 3' end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a '$' character is appended ('anchoring'), the\nadapter is only found if it is a suffix of the read.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--front", + "alternatives" : [ + "-g" + ], + "description" : "Sequence of an adapter ligated to the 5' end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5' end are allowed. If\na '^' character is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of the read.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--anywhere", + "alternatives" : [ + "-b" + ], + "description" : "Sequence of an adapter that may be ligated to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Specify Adapters using Fasta files for R1", + "arguments" : [ + { + "type" : "file", + "name" : "--adapter_fasta", + "description" : "Fasta file containing sequences of an adapter ligated to the 3' end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a '$' character is appended ('anchoring'), the\nadapter is only found if it is a suffix of the read.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--front_fasta", + "description" : "Fasta file containing sequences of an adapter ligated to the 5' end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5' end are allowed. If\na '^' character is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of the read.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--anywhere_fasta", + "description" : "Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Specify Adapters for R2", + "arguments" : [ + { + "type" : "string", + "name" : "--adapter_r2", + "alternatives" : [ + "-A" + ], + "description" : "Sequence of an adapter ligated to the 3' end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a '$' character is appended ('anchoring'), the\nadapter is only found if it is a suffix of the read.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--front_r2", + "alternatives" : [ + "-G" + ], + "description" : "Sequence of an adapter ligated to the 5' end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5' end are allowed. If\na '^' character is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of the read.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--anywhere_r2", + "alternatives" : [ + "-B" + ], + "description" : "Sequence of an adapter that may be ligated to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Specify Adapters using Fasta files for R2", + "arguments" : [ + { + "type" : "file", + "name" : "--adapter_r2_fasta", + "description" : "Fasta file containing sequences of an adapter ligated to the 3' end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a '$' character is appended ('anchoring'), the\nadapter is only found if it is a suffix of the read.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--front_r2_fasta", + "description" : "Fasta file containing sequences of an adapter ligated to the 5' end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5' end are allowed. If\na '^' character is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of the read.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--anywhere_r2_fasta", + "description" : "Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Paired-end options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--pair_adapters", + "description" : "Treat adapters given with -a/-A etc. as pairs. Either both\nor none are removed from each read pair.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--pair_filter", + "description" : "Which of the reads in a paired-end read have to match the\nfiltering criterion in order for the pair to be filtered.\n", + "required" : false, + "choices" : [ + "any", + "both", + "first" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--interleaved", + "description" : "Read and/or write interleaved paired-end reads.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Input parameters", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq file for single-end reads or R1 for paired-end reads.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_r2", + "description" : "Input fastq file for R2 in the case of paired-end reads.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--error_rate", + "alternatives" : [ + "-E", + "--errors" + ], + "description" : "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber of errors for full-length adapter match (if E is an\ninteger >= 1). Error rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_false", + "name" : "--no_indels", + "description" : "Allow only mismatches in alignments.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--times", + "alternatives" : [ + "-n" + ], + "description" : "Remove up to COUNT adapters from each read. Default: 1.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--overlap", + "alternatives" : [ + "-O" + ], + "description" : "Require MINLENGTH overlap between read and adapter for an\nadapter to be found. The default is 3.\n", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--match_read_wildcards", + "description" : "Interpret IUPAC wildcards in reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_false", + "name" : "--no_match_adapter_wildcards", + "description" : "Do not interpret IUPAC wildcards in adapters.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--action", + "description" : "What to do if a match was found. trim: trim adapter and\nup- or downstream sequence; retain: trim, but retain\nadapter; mask: replace with 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\nThe default is trim.\n", + "example" : [ + "trim" + ], + "required" : false, + "choices" : [ + "trim", + "retain", + "mask", + "lowercase", + "none" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--revcomp", + "alternatives" : [ + "--rc" + ], + "description" : "Check both the read and its reverse complement for adapter\nmatches. If match is on reverse-complemented version,\noutput that one.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Read modifications", + "arguments" : [ + { + "type" : "integer", + "name" : "--cut", + "alternatives" : [ + "-u" + ], + "description" : "Remove LEN bases from each read (or R1 if paired; use --cut_r2\noption for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cut_r2", + "description" : "Remove LEN bases from each read (for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--nextseq_trim", + "description" : "NextSeq-specific quality trimming (each read). Trims also\ndark cycles appearing as high-quality G bases.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quality_cutoff", + "alternatives" : [ + "-q" + ], + "description" : "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore adapter removal. Applied to both reads if data is\npaired. If one value is given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given, the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quality_cutoff_r2", + "alternatives" : [ + "-Q" + ], + "description" : "Quality-trimming cutoff for R2. Default: same as for R1\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--quality_base", + "description" : "Assume that quality values in FASTQ are encoded as\nascii(quality + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default is 33.\n", + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--poly_a", + "description" : "Trim poly-A tails", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--length", + "alternatives" : [ + "-l" + ], + "description" : "Shorten reads to LENGTH. Positive values remove bases at\nthe end while negative ones remove bases at the beginning.\nThis and the following modifications are applied after\nadapter trimming.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--trim_n", + "description" : "Trim N's on ends of reads.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--length_tag", + "description" : "Search for TAG followed by a decimal number in the\ndescription field of the read. Replace the decimal number\nwith the correct length of the trimmed read. For example,\nuse --length-tag 'length=' to correct fields like\n'length=123'.\n", + "example" : [ + "length=" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strip_suffix", + "description" : "Remove this suffix from read names if present. Can be\ngiven multiple times.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--prefix", + "alternatives" : [ + "-x" + ], + "description" : "Add this prefix to read names. Use {name} to insert the\nname of the matching adapter.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--suffix", + "alternatives" : [ + "-y" + ], + "description" : "Add this suffix to read names; can also include {name}\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--rename", + "description" : "Rename reads using TEMPLATE containing variables such as\n{id}, {adapter_name} etc. (see documentation)\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--zero_cap", + "alternatives" : [ + "-z" + ], + "description" : "Change negative quality values to zero.", + "direction" : "input" + } + ] + }, + { + "name" : "Filtering of processed reads", + "description" : "Filters are applied after above read modifications. Paired-end reads are\nalways discarded pairwise (see also --pair_filter).\n", + "arguments" : [ + { + "type" : "string", + "name" : "--minimum_length", + "alternatives" : [ + "-m" + ], + "description" : "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end reads, the minimum lengths for R1 and R2 can be specified separately by separating them with a colon (:).\nIf the colon syntax is not used, the same minimum length applies to both reads, as discussed above.\nAlso, one of the values can be omitted to impose no restrictions.\nFor example, with -m 17:, the length of R1 must be at least 17, but the length of R2 is ignored.\n", + "example" : [ + "0" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--maximum_length", + "alternatives" : [ + "-M" + ], + "description" : "Discard reads longer than LEN. Default: no limit.\nFor paired reads, see the remark for --minimum_length\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--max_n", + "description" : "Discard reads with more than COUNT 'N' bases. If COUNT is\na number between 0 and 1, it is interpreted as a fraction\nof the read length.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--max_expected_errors", + "alternatives" : [ + "--max_ee" + ], + "description" : "Discard reads whose expected number of errors (computed\nfrom quality values) exceeds ERRORS.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--max_average_error_rate", + "alternatives" : [ + "--max_aer" + ], + "description" : "as --max_expected_errors (see above), but divided by\nlength to account for reads of varying length.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--discard_trimmed", + "alternatives" : [ + "--discard" + ], + "description" : "Discard reads that contain an adapter. Use also -O to\navoid discarding too many randomly matching reads.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--discard_untrimmed", + "alternatives" : [ + "--trimmed_only" + ], + "description" : "Discard reads that do not contain an adapter.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--discard_casava", + "description" : "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Output parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--report", + "description" : "Which type of report to print: 'full' (default) or 'minimal'.\n", + "example" : [ + "full" + ], + "required" : false, + "choices" : [ + "full", + "minimal" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--json", + "description" : "Write report in JSON format to this file.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Glob pattern for matching the expected output files.\nShould include `$output_dir`.\n", + "example" : [ + "fastq/*_001.fast[a,q]" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--fasta", + "description" : "Output FASTA to standard output even on FASTQ input.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--info_file", + "description" : "Write information about each read and its adapter matches\ninto info.txt in the output directory.\nSee the documentation for the file format.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Debug", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--debug", + "description" : "Print debug information", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "RNA-seq", + "scRNA-seq", + "high-throughput" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.14806/ej.17.1.200" + ] + }, + "links" : { + "repository" : "https://github.com/marcelm/cutadapt", + "homepage" : "https://cutadapt.readthedocs.io", + "documentation" : "https://cutadapt.readthedocs.io" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.12", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "python", + "user" : false, + "pip" : [ + "cutadapt" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "cutadapt --version | sed 's/\\\\(.*\\\\)/cutadapt: \\"\\\\1\\"/' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/cutadapt/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/cutadapt", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ADAPTER+x} ]; then echo "${VIASH_PAR_ADAPTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter='&'#" ; else echo "# par_adapter="; fi ) +$( if [ ! -z ${VIASH_PAR_FRONT+x} ]; then echo "${VIASH_PAR_FRONT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_front='&'#" ; else echo "# par_front="; fi ) +$( if [ ! -z ${VIASH_PAR_ANYWHERE+x} ]; then echo "${VIASH_PAR_ANYWHERE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_anywhere='&'#" ; else echo "# par_anywhere="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_FASTA+x} ]; then echo "${VIASH_PAR_ADAPTER_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_fasta='&'#" ; else echo "# par_adapter_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_FRONT_FASTA+x} ]; then echo "${VIASH_PAR_FRONT_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_front_fasta='&'#" ; else echo "# par_front_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_ANYWHERE_FASTA+x} ]; then echo "${VIASH_PAR_ANYWHERE_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_anywhere_fasta='&'#" ; else echo "# par_anywhere_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_R2+x} ]; then echo "${VIASH_PAR_ADAPTER_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_r2='&'#" ; else echo "# par_adapter_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_FRONT_R2+x} ]; then echo "${VIASH_PAR_FRONT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_front_r2='&'#" ; else echo "# par_front_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_ANYWHERE_R2+x} ]; then echo "${VIASH_PAR_ANYWHERE_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_anywhere_r2='&'#" ; else echo "# par_anywhere_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTER_R2_FASTA+x} ]; then echo "${VIASH_PAR_ADAPTER_R2_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter_r2_fasta='&'#" ; else echo "# par_adapter_r2_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_FRONT_R2_FASTA+x} ]; then echo "${VIASH_PAR_FRONT_R2_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_front_r2_fasta='&'#" ; else echo "# par_front_r2_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_ANYWHERE_R2_FASTA+x} ]; then echo "${VIASH_PAR_ANYWHERE_R2_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_anywhere_r2_fasta='&'#" ; else echo "# par_anywhere_r2_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIR_ADAPTERS+x} ]; then echo "${VIASH_PAR_PAIR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_adapters='&'#" ; else echo "# par_pair_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIR_FILTER+x} ]; then echo "${VIASH_PAR_PAIR_FILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_filter='&'#" ; else echo "# par_pair_filter="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERLEAVED+x} ]; then echo "${VIASH_PAR_INTERLEAVED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved='&'#" ; else echo "# par_interleaved="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR_RATE+x} ]; then echo "${VIASH_PAR_ERROR_RATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error_rate='&'#" ; else echo "# par_error_rate="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_INDELS+x} ]; then echo "${VIASH_PAR_NO_INDELS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_indels='&'#" ; else echo "# par_no_indels="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMES+x} ]; then echo "${VIASH_PAR_TIMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_times='&'#" ; else echo "# par_times="; fi ) +$( if [ ! -z ${VIASH_PAR_OVERLAP+x} ]; then echo "${VIASH_PAR_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_overlap='&'#" ; else echo "# par_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_MATCH_READ_WILDCARDS+x} ]; then echo "${VIASH_PAR_MATCH_READ_WILDCARDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_match_read_wildcards='&'#" ; else echo "# par_match_read_wildcards="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_MATCH_ADAPTER_WILDCARDS+x} ]; then echo "${VIASH_PAR_NO_MATCH_ADAPTER_WILDCARDS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_match_adapter_wildcards='&'#" ; else echo "# par_no_match_adapter_wildcards="; fi ) +$( if [ ! -z ${VIASH_PAR_ACTION+x} ]; then echo "${VIASH_PAR_ACTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_action='&'#" ; else echo "# par_action="; fi ) +$( if [ ! -z ${VIASH_PAR_REVCOMP+x} ]; then echo "${VIASH_PAR_REVCOMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_revcomp='&'#" ; else echo "# par_revcomp="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT+x} ]; then echo "${VIASH_PAR_CUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut='&'#" ; else echo "# par_cut="; fi ) +$( if [ ! -z ${VIASH_PAR_CUT_R2+x} ]; then echo "${VIASH_PAR_CUT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cut_r2='&'#" ; else echo "# par_cut_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_NEXTSEQ_TRIM+x} ]; then echo "${VIASH_PAR_NEXTSEQ_TRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nextseq_trim='&'#" ; else echo "# par_nextseq_trim="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_CUTOFF+x} ]; then echo "${VIASH_PAR_QUALITY_CUTOFF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_cutoff='&'#" ; else echo "# par_quality_cutoff="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_CUTOFF_R2+x} ]; then echo "${VIASH_PAR_QUALITY_CUTOFF_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_cutoff_r2='&'#" ; else echo "# par_quality_cutoff_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_BASE+x} ]; then echo "${VIASH_PAR_QUALITY_BASE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_base='&'#" ; else echo "# par_quality_base="; fi ) +$( if [ ! -z ${VIASH_PAR_POLY_A+x} ]; then echo "${VIASH_PAR_POLY_A}" | sed "s#'#'\\"'\\"'#g;s#.*#par_poly_a='&'#" ; else echo "# par_poly_a="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH+x} ]; then echo "${VIASH_PAR_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length='&'#" ; else echo "# par_length="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_N+x} ]; then echo "${VIASH_PAR_TRIM_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_n='&'#" ; else echo "# par_trim_n="; fi ) +$( if [ ! -z ${VIASH_PAR_LENGTH_TAG+x} ]; then echo "${VIASH_PAR_LENGTH_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_length_tag='&'#" ; else echo "# par_length_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_STRIP_SUFFIX+x} ]; then echo "${VIASH_PAR_STRIP_SUFFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strip_suffix='&'#" ; else echo "# par_strip_suffix="; fi ) +$( if [ ! -z ${VIASH_PAR_PREFIX+x} ]; then echo "${VIASH_PAR_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_prefix='&'#" ; else echo "# par_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_SUFFIX+x} ]; then echo "${VIASH_PAR_SUFFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_suffix='&'#" ; else echo "# par_suffix="; fi ) +$( if [ ! -z ${VIASH_PAR_RENAME+x} ]; then echo "${VIASH_PAR_RENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rename='&'#" ; else echo "# par_rename="; fi ) +$( if [ ! -z ${VIASH_PAR_ZERO_CAP+x} ]; then echo "${VIASH_PAR_ZERO_CAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zero_cap='&'#" ; else echo "# par_zero_cap="; fi ) +$( if [ ! -z ${VIASH_PAR_MINIMUM_LENGTH+x} ]; then echo "${VIASH_PAR_MINIMUM_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minimum_length='&'#" ; else echo "# par_minimum_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MAXIMUM_LENGTH+x} ]; then echo "${VIASH_PAR_MAXIMUM_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maximum_length='&'#" ; else echo "# par_maximum_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_N+x} ]; then echo "${VIASH_PAR_MAX_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_n='&'#" ; else echo "# par_max_n="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_EXPECTED_ERRORS+x} ]; then echo "${VIASH_PAR_MAX_EXPECTED_ERRORS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_expected_errors='&'#" ; else echo "# par_max_expected_errors="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_AVERAGE_ERROR_RATE+x} ]; then echo "${VIASH_PAR_MAX_AVERAGE_ERROR_RATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_average_error_rate='&'#" ; else echo "# par_max_average_error_rate="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARD_TRIMMED+x} ]; then echo "${VIASH_PAR_DISCARD_TRIMMED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discard_trimmed='&'#" ; else echo "# par_discard_trimmed="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARD_UNTRIMMED+x} ]; then echo "${VIASH_PAR_DISCARD_UNTRIMMED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discard_untrimmed='&'#" ; else echo "# par_discard_untrimmed="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARD_CASAVA+x} ]; then echo "${VIASH_PAR_DISCARD_CASAVA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discard_casava='&'#" ; else echo "# par_discard_casava="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi ) +$( if [ ! -z ${VIASH_PAR_JSON+x} ]; then echo "${VIASH_PAR_JSON}" | sed "s#'#'\\"'\\"'#g;s#.*#par_json='&'#" ; else echo "# par_json="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_INFO_FILE+x} ]; then echo "${VIASH_PAR_INFO_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_info_file='&'#" ; else echo "# par_info_file="; fi ) +$( if [ ! -z ${VIASH_PAR_DEBUG+x} ]; then echo "${VIASH_PAR_DEBUG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_debug='&'#" ; else echo "# par_debug="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +function debug { + [[ "\\$par_debug" == "true" ]] && echo "DEBUG: \\$@" +} + +output_dir=\\$(dirname \\$par_output) +[[ ! -d \\$output_dir ]] && mkdir -p \\$output_dir + +# Init +########################################################### + +echo ">> Paired-end data or not?" + +mode="" +if [[ -z \\$par_input_r2 ]]; then + mode="se" + echo " Single end" + input="\\$par_input" +else + echo " Paired end" + mode="pe" + input="\\$par_input \\$par_input_r2" +fi + +# Adapter arguments +# - paired and single-end +# - string and fasta +########################################################### + +function add_flags { + local arg=\\$1 + local flag=\\$2 + local prefix=\\$3 + [[ -z \\$prefix ]] && prefix="" + + # This function should not be called if the input is empty + # but check for it just in case + if [[ -z \\$arg ]]; then + return + fi + + local output="" + IFS=';' read -r -a array <<< "\\$arg" + for a in "\\${array[@]}"; do + output="\\$output \\$flag \\$prefix\\$a" + done + echo \\$output +} + +debug ">> Parsing arguments dealing with adapters" +adapter_args=\\$(echo \\\\ + \\${par_adapter:+\\$(add_flags "\\$par_adapter" "--adapter")} \\\\ + \\${par_adapter_fasta:+\\$(add_flags "\\$par_adapter_fasta" "--adapter" "file:")} \\\\ + \\${par_front:+\\$(add_flags "\\$par_front" "--front")} \\\\ + \\${par_front_fasta:+\\$(add_flags "\\$par_front_fasta" "--front" "file:")} \\\\ + \\${par_anywhere:+\\$(add_flags "\\$par_anywhere" "--anywhere")} \\\\ + \\${par_anywhere_fasta:+\\$(add_flags "\\$par_anywhere_fasta" "--anywhere" "file:")} \\\\ + \\${par_adapter_r2:+\\$(add_flags "\\$par_adapter_r2" "-A")} \\\\ + \\${par_adapter_fasta_r2:+\\$(add_flags "\\$par_adapter_fasta_r2" "-A" "file:")} \\\\ + \\${par_front_r2:+\\$(add_flags "\\$par_front_r2" "-G")} \\\\ + \\${par_front_fasta_r2:+\\$(add_flags "\\$par_front_fasta_r2" "-G" "file:")} \\\\ + \\${par_anywhere_r2:+\\$(add_flags "\\$par_anywhere_r2" "-B")} \\\\ + \\${par_anywhere_fasta_r2:+\\$(add_flags "\\$par_anywhere_fasta_r2" "-B" "file:")} \\\\ +) + +debug "Arguments to cutadapt:" +debug "\\$adapter_args" +debug + +# Paired-end options +########################################################### +echo ">> Parsing arguments for paired-end reads" +[[ "\\$par_pair_adapters" == "false" ]] && unset par_pair_adapters +[[ "\\$par_interleaved" == "false" ]] && unset par_interleaved + +paired_args=\\$(echo \\\\ + \\${par_pair_adapters:+--pair-adapters} \\\\ + \\${par_pair_filter:+--pair-filter "\\${par_pair_filter}"} \\\\ + \\${par_interleaved:+--interleaved} +) +debug "Arguments to cutadapt:" +debug \\$paired_args +debug + +# Input arguments +########################################################### +echo ">> Parsing input arguments" +[[ "\\$par_no_indels" == "true" ]] && unset par_no_indels +[[ "\\$par_match_read_wildcards" == "false" ]] && unset par_match_read_wildcards +[[ "\\$par_no_match_adapter_wildcards" == "true" ]] && unset par_no_match_adapter_wildcards +[[ "\\$par_revcomp" == "false" ]] && unset par_revcomp + +input_args=\\$(echo \\\\ + \\${par_error_rate:+--error-rate "\\${par_error_rate}"} \\\\ + \\${par_no_indels:+--no-indels} \\\\ + \\${par_times:+--times "\\${par_times}"} \\\\ + \\${par_overlap:+--overlap "\\${par_overlap}"} \\\\ + \\${par_match_read_wildcards:+--match-read-wildcards} \\\\ + \\${par_no_match_adapter_wildcards:+--no-match-adapter-wildcards} \\\\ + \\${par_action:+--action "\\${par_action}"} \\\\ + \\${par_revcomp:+--revcomp} \\\\ +) +debug "Arguments to cutadapt:" +debug \\$input_args +debug + +# Read modifications +########################################################### +echo ">> Parsing read modification arguments" +[[ "\\$par_poly_a" == "false" ]] && unset par_poly_a +[[ "\\$par_trim_n" == "false" ]] && unset par_trim_n +[[ "\\$par_zero_cap" == "false" ]] && unset par_zero_cap + +mod_args=\\$(echo \\\\ + \\${par_cut:+--cut "\\${par_cut}"} \\\\ + \\${par_cut_r2:+--cut_r2 "\\${par_cut_r2}"} \\\\ + \\${par_nextseq_trim:+--nextseq-trim "\\${par_nextseq_trim}"} \\\\ + \\${par_quality_cutoff:+--quality-cutoff "\\${par_quality_cutoff}"} \\\\ + \\${par_quality_cutoff_r2:+--quality-cutoff_r2 "\\${par_quality_cutoff_r2}"} \\\\ + \\${par_quality_base:+--quality-base "\\${par_quality_base}"} \\\\ + \\${par_poly_a:+--poly-a} \\\\ + \\${par_length:+--length "\\${par_length}"} \\\\ + \\${par_trim_n:+--trim-n} \\\\ + \\${par_length_tag:+--length-tag "\\${par_length_tag}"} \\\\ + \\${par_strip_suffix:+--strip-suffix "\\${par_strip_suffix}"} \\\\ + \\${par_prefix:+--prefix "\\${par_prefix}"} \\\\ + \\${par_suffix:+--suffix "\\${par_suffix}"} \\\\ + \\${par_rename:+--rename "\\${par_rename}"} \\\\ + \\${par_zero_cap:+--zero-cap} \\\\ +) +debug "Arguments to cutadapt:" +debug \\$mod_args +debug + +# Filtering of processed reads arguments +########################################################### +echo ">> Filtering of processed reads arguments" +[[ "\\$par_discard_trimmed" == "false" ]] && unset par_discard_trimmed +[[ "\\$par_discard_untrimmed" == "false" ]] && unset par_discard_untrimmed +[[ "\\$par_discard_casava" == "false" ]] && unset par_discard_casava + +# Parse and transform the minimum and maximum length arguments +[[ -z \\$par_minimum_length ]] + +filter_args=\\$(echo \\\\ + \\${par_minimum_length:+--minimum-length "\\${par_minimum_length}"} \\\\ + \\${par_maximum_length:+--maximum-length "\\${par_maximum_length}"} \\\\ + \\${par_max_n:+--max-n "\\${par_max_n}"} \\\\ + \\${par_max_expected_errors:+--max-expected-errors "\\${par_max_expected_errors}"} \\\\ + \\${par_max_average_error_rate:+--max-average-error-rate "\\${par_max_average_error_rate}"} \\\\ + \\${par_discard_trimmed:+--discard-trimmed} \\\\ + \\${par_discard_untrimmed:+--discard-untrimmed} \\\\ + \\${par_discard_casava:+--discard-casava} \\\\ +) +debug "Arguments to cutadapt:" +debug \\$filter_args +debug + +# Optional output arguments +########################################################### +echo ">> Optional arguments" +[[ "\\$par_json" == "false" ]] && unset par_json +[[ "\\$par_fasta" == "false" ]] && unset par_fasta +[[ "\\$par_info_file" == "false" ]] && unset par_info_file + +optional_output_args=\\$(echo \\\\ + \\${par_report:+--report "\\${par_report}"} \\\\ + \\${par_json:+--json "report.json"} \\\\ + \\${par_fasta:+--fasta} \\\\ + \\${par_info_file:+--info-file "info.txt"} \\\\ +) + +debug "Arguments to cutadapt:" +debug \\$optional_output_args +debug + +# Output arguments +# We write the output to a directory rather than +# individual files. +########################################################### + +if [[ -z \\$par_fasta ]]; then + ext="fastq" +else + ext="fasta" +fi + +if [ \\$mode = "se" ]; then + output_args=\\$(echo \\\\ + --output "\\$output_dir/{name}_001.\\$ext" \\\\ + ) +else + output_args=\\$(echo \\\\ + --output "\\$output_dir/{name}_R1_001.\\$ext" \\\\ + --paired-output "\\$output_dir/{name}_R2_001.\\$ext" \\\\ + ) +fi + +debug "Arguments to cutadapt:" +debug \\$output_args +debug + +# Full CLI +# Set the --cores argument to 0 unless meta_cpus is set +########################################################### +echo ">> Running cutadapt" +par_cpus=0 +[[ ! -z \\$meta_cpus ]] && par_cpus=\\$meta_cpus + +cli=\\$(echo \\\\ + \\$input \\\\ + \\$adapter_args \\\\ + \\$paired_args \\\\ + \\$input_args \\\\ + \\$mod_args \\\\ + \\$filter_args \\\\ + \\$optional_output_args \\\\ + \\$output_args \\\\ + --cores \\$par_cpus +) + +debug ">> Full CLI to be run:" +debug cutadapt \\$cli | sed -e 's/--/\\\\r\\\\n --/g' +debug + +cutadapt \\$cli +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/cutadapt", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow.config new file mode 100644 index 0000000..0b8b606 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'cutadapt' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'Cutadapt removes adapter sequences from high-throughput sequencing reads.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow_schema.json new file mode 100644 index 0000000..2438aca --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/nextflow_schema.json @@ -0,0 +1,749 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cutadapt", +"description": "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n", +"type": "object", +"definitions": { + + + + "specify adapters for r1" : { + "title": "Specify Adapters for R1", + "type": "object", + "description": "No description", + "properties": { + + + "adapter": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n" + + } + + + , + "front": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n" + + } + + + , + "anywhere": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n" + + } + + +} +}, + + + "specify adapters using fasta files for r1" : { + "title": "Specify Adapters using Fasta files for R1", + "type": "object", + "description": "No description", + "properties": { + + + "adapter_fasta": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\":\"`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)", + "help_text": "Type: List of `file`, multiple_sep: `\":\"`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n" + + } + + + , + "front_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)", + "help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n" + + } + + + , + "anywhere_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)", + "help_text": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n" + + } + + +} +}, + + + "specify adapters for r2" : { + "title": "Specify Adapters for R2", + "type": "object", + "description": "No description", + "properties": { + + + "adapter_r2": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n" + + } + + + , + "front_r2": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n" + + } + + + , + "anywhere_r2": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Sequence of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n" + + } + + +} +}, + + + "specify adapters using fasta files for r2" : { + "title": "Specify Adapters using Fasta files for R2", + "type": "object", + "description": "No description", + "properties": { + + + "adapter_r2_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read)", + "help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 3\u0027 end (paired data:\nof the first read). The adapter and subsequent bases are\ntrimmed. If a \u0027$\u0027 character is appended (\u0027anchoring\u0027), the\nadapter is only found if it is a suffix of the read.\n" + + } + + + , + "front_r2_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read)", + "help_text": "Type: `file`. Fasta file containing sequences of an adapter ligated to the 5\u0027 end (paired data:\nof the first read). The adapter and any preceding bases\nare trimmed. Partial matches at the 5\u0027 end are allowed. If\na \u0027^\u0027 character is prepended (\u0027anchoring\u0027), the adapter is\nonly found if it is a prefix of the read.\n" + + } + + + , + "anywhere_r2_fasta": { + "type": + "string", + "description": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read)", + "help_text": "Type: `file`. Fasta file containing sequences of an adapter that may be ligated to the 5\u0027 or 3\u0027\nend (paired data: of the first read). Both types of\nmatches as described under -a and -g are allowed. If the\nfirst base of the read is part of the match, the behavior\nis as with -g, otherwise as with -a. This option is mostly\nfor rescuing failed library preparations - do not use if\nyou know which end your adapter was ligated to!\n" + + } + + +} +}, + + + "paired-end options" : { + "title": "Paired-end options", + "type": "object", + "description": "No description", + "properties": { + + + "pair_adapters": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Treat adapters given with -a/-A etc", + "help_text": "Type: `boolean_true`, default: `false`. Treat adapters given with -a/-A etc. as pairs. Either both\nor none are removed from each read pair.\n" + , + "default": "False" + } + + + , + "pair_filter": { + "type": + "string", + "description": "Type: `string`, choices: ``any`, `both`, `first``. Which of the reads in a paired-end read have to match the\nfiltering criterion in order for the pair to be filtered", + "help_text": "Type: `string`, choices: ``any`, `both`, `first``. Which of the reads in a paired-end read have to match the\nfiltering criterion in order for the pair to be filtered.\n", + "enum": ["any", "both", "first"] + + + } + + + , + "interleaved": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Read and/or write interleaved paired-end reads", + "help_text": "Type: `boolean_true`, default: `false`. Read and/or write interleaved paired-end reads.\n" + , + "default": "False" + } + + +} +}, + + + "input parameters" : { + "title": "Input parameters", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input fastq file for single-end reads or R1 for paired-end reads", + "help_text": "Type: `file`, required. Input fastq file for single-end reads or R1 for paired-end reads.\n" + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: `file`. Input fastq file for R2 in the case of paired-end reads", + "help_text": "Type: `file`. Input fastq file for R2 in the case of paired-end reads.\n" + + } + + + , + "error_rate": { + "type": + "number", + "description": "Type: `double`, example: `0.1`. Maximum allowed error rate (if 0 \u003c= E \u003c 1), or absolute\nnumber of errors for full-length adapter match (if E is an\ninteger \u003e= 1)", + "help_text": "Type: `double`, example: `0.1`. Maximum allowed error rate (if 0 \u003c= E \u003c 1), or absolute\nnumber of errors for full-length adapter match (if E is an\ninteger \u003e= 1). Error rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n" + + } + + + , + "no_indels": { + "type": + "boolean", + "description": "Type: `boolean_false`, default: `true`. Allow only mismatches in alignments", + "help_text": "Type: `boolean_false`, default: `true`. Allow only mismatches in alignments.\n" + , + "default": "True" + } + + + , + "times": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Remove up to COUNT adapters from each read", + "help_text": "Type: `integer`, example: `1`. Remove up to COUNT adapters from each read. Default: 1.\n" + + } + + + , + "overlap": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. Require MINLENGTH overlap between read and adapter for an\nadapter to be found", + "help_text": "Type: `integer`, example: `3`. Require MINLENGTH overlap between read and adapter for an\nadapter to be found. The default is 3.\n" + + } + + + , + "match_read_wildcards": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Interpret IUPAC wildcards in reads", + "help_text": "Type: `boolean_true`, default: `false`. Interpret IUPAC wildcards in reads.\n" + , + "default": "False" + } + + + , + "no_match_adapter_wildcards": { + "type": + "boolean", + "description": "Type: `boolean_false`, default: `true`. Do not interpret IUPAC wildcards in adapters", + "help_text": "Type: `boolean_false`, default: `true`. Do not interpret IUPAC wildcards in adapters.\n" + , + "default": "True" + } + + + , + "action": { + "type": + "string", + "description": "Type: `string`, example: `trim`, choices: ``trim`, `retain`, `mask`, `lowercase`, `none``. What to do if a match was found", + "help_text": "Type: `string`, example: `trim`, choices: ``trim`, `retain`, `mask`, `lowercase`, `none``. What to do if a match was found. trim: trim adapter and\nup- or downstream sequence; retain: trim, but retain\nadapter; mask: replace with \u0027N\u0027 characters; lowercase:\nconvert to lowercase; none: leave unchanged.\nThe default is trim.\n", + "enum": ["trim", "retain", "mask", "lowercase", "none"] + + + } + + + , + "revcomp": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Check both the read and its reverse complement for adapter\nmatches", + "help_text": "Type: `boolean_true`, default: `false`. Check both the read and its reverse complement for adapter\nmatches. If match is on reverse-complemented version,\noutput that one.\n" + , + "default": "False" + } + + +} +}, + + + "read modifications" : { + "title": "Read modifications", + "type": "object", + "description": "No description", + "properties": { + + + "cut": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (or R1 if paired; use --cut_r2\noption for R2)", + "help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (or R1 if paired; use --cut_r2\noption for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n" + + } + + + , + "cut_r2": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (for R2)", + "help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Remove LEN bases from each read (for R2). If LEN is positive, remove bases from the\nbeginning. If LEN is negative, remove bases from the end.\nCan be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n" + + } + + + , + "nextseq_trim": { + "type": + "string", + "description": "Type: `string`. NextSeq-specific quality trimming (each read)", + "help_text": "Type: `string`. NextSeq-specific quality trimming (each read). Trims also\ndark cycles appearing as high-quality G bases.\n" + + } + + + , + "quality_cutoff": { + "type": + "string", + "description": "Type: `string`. Trim low-quality bases from 5\u0027 and/or 3\u0027 ends of each read\nbefore adapter removal", + "help_text": "Type: `string`. Trim low-quality bases from 5\u0027 and/or 3\u0027 ends of each read\nbefore adapter removal. Applied to both reads if data is\npaired. If one value is given, only the 3\u0027 end is trimmed.\nIf two comma-separated cutoffs are given, the 5\u0027 end is\ntrimmed with the first cutoff, the 3\u0027 end with the second.\n" + + } + + + , + "quality_cutoff_r2": { + "type": + "string", + "description": "Type: `string`. Quality-trimming cutoff for R2", + "help_text": "Type: `string`. Quality-trimming cutoff for R2. Default: same as for R1\n" + + } + + + , + "quality_base": { + "type": + "integer", + "description": "Type: `integer`, example: `33`. Assume that quality values in FASTQ are encoded as\nascii(quality + N)", + "help_text": "Type: `integer`, example: `33`. Assume that quality values in FASTQ are encoded as\nascii(quality + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default is 33.\n" + + } + + + , + "poly_a": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Trim poly-A tails", + "help_text": "Type: `boolean_true`, default: `false`. Trim poly-A tails" + , + "default": "False" + } + + + , + "length": { + "type": + "integer", + "description": "Type: `integer`. Shorten reads to LENGTH", + "help_text": "Type: `integer`. Shorten reads to LENGTH. Positive values remove bases at\nthe end while negative ones remove bases at the beginning.\nThis and the following modifications are applied after\nadapter trimming.\n" + + } + + + , + "trim_n": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Trim N\u0027s on ends of reads", + "help_text": "Type: `boolean_true`, default: `false`. Trim N\u0027s on ends of reads." + , + "default": "False" + } + + + , + "length_tag": { + "type": + "string", + "description": "Type: `string`, example: `length=`. Search for TAG followed by a decimal number in the\ndescription field of the read", + "help_text": "Type: `string`, example: `length=`. Search for TAG followed by a decimal number in the\ndescription field of the read. Replace the decimal number\nwith the correct length of the trimmed read. For example,\nuse --length-tag \u0027length=\u0027 to correct fields like\n\u0027length=123\u0027.\n" + + } + + + , + "strip_suffix": { + "type": + "string", + "description": "Type: `string`. Remove this suffix from read names if present", + "help_text": "Type: `string`. Remove this suffix from read names if present. Can be\ngiven multiple times.\n" + + } + + + , + "prefix": { + "type": + "string", + "description": "Type: `string`. Add this prefix to read names", + "help_text": "Type: `string`. Add this prefix to read names. Use {name} to insert the\nname of the matching adapter.\n" + + } + + + , + "suffix": { + "type": + "string", + "description": "Type: `string`. Add this suffix to read names; can also include {name}\n", + "help_text": "Type: `string`. Add this suffix to read names; can also include {name}\n" + + } + + + , + "rename": { + "type": + "string", + "description": "Type: `string`. Rename reads using TEMPLATE containing variables such as\n{id}, {adapter_name} etc", + "help_text": "Type: `string`. Rename reads using TEMPLATE containing variables such as\n{id}, {adapter_name} etc. (see documentation)\n" + + } + + + , + "zero_cap": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Change negative quality values to zero", + "help_text": "Type: `boolean_true`, default: `false`. Change negative quality values to zero." + , + "default": "False" + } + + +} +}, + + + "filtering of processed reads" : { + "title": "Filtering of processed reads", + "type": "object", + "description": "Filters are applied after above read modifications. Paired-end reads are\nalways discarded pairwise (see also --pair_filter).\n", + "properties": { + + + "minimum_length": { + "type": + "string", + "description": "Type: `string`, example: `0`. Discard reads shorter than LEN", + "help_text": "Type: `string`, example: `0`. Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end reads, the minimum lengths for R1 and R2 can be specified separately by separating them with a colon (:).\nIf the colon syntax is not used, the same minimum length applies to both reads, as discussed above.\nAlso, one of the values can be omitted to impose no restrictions.\nFor example, with -m 17:, the length of R1 must be at least 17, but the length of R2 is ignored.\n" + + } + + + , + "maximum_length": { + "type": + "string", + "description": "Type: `string`. Discard reads longer than LEN", + "help_text": "Type: `string`. Discard reads longer than LEN. Default: no limit.\nFor paired reads, see the remark for --minimum_length\n" + + } + + + , + "max_n": { + "type": + "string", + "description": "Type: `string`. Discard reads with more than COUNT \u0027N\u0027 bases", + "help_text": "Type: `string`. Discard reads with more than COUNT \u0027N\u0027 bases. If COUNT is\na number between 0 and 1, it is interpreted as a fraction\nof the read length.\n" + + } + + + , + "max_expected_errors": { + "type": + "string", + "description": "Type: `long`. Discard reads whose expected number of errors (computed\nfrom quality values) exceeds ERRORS", + "help_text": "Type: `long`. Discard reads whose expected number of errors (computed\nfrom quality values) exceeds ERRORS.\n" + + } + + + , + "max_average_error_rate": { + "type": + "string", + "description": "Type: `long`. as --max_expected_errors (see above), but divided by\nlength to account for reads of varying length", + "help_text": "Type: `long`. as --max_expected_errors (see above), but divided by\nlength to account for reads of varying length.\n" + + } + + + , + "discard_trimmed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard reads that contain an adapter", + "help_text": "Type: `boolean_true`, default: `false`. Discard reads that contain an adapter. Use also -O to\navoid discarding too many randomly matching reads.\n" + , + "default": "False" + } + + + , + "discard_untrimmed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard reads that do not contain an adapter", + "help_text": "Type: `boolean_true`, default: `false`. Discard reads that do not contain an adapter.\n" + , + "default": "False" + } + + + , + "discard_casava": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Discard reads that did not pass CASAVA filtering (header\nhas :Y:)", + "help_text": "Type: `boolean_true`, default: `false`. Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n" + , + "default": "False" + } + + +} +}, + + + "output parameters" : { + "title": "Output parameters", + "type": "object", + "description": "No description", + "properties": { + + + "report": { + "type": + "string", + "description": "Type: `string`, example: `full`, choices: ``full`, `minimal``. Which type of report to print: \u0027full\u0027 (default) or \u0027minimal\u0027", + "help_text": "Type: `string`, example: `full`, choices: ``full`, `minimal``. Which type of report to print: \u0027full\u0027 (default) or \u0027minimal\u0027.\n", + "enum": ["full", "minimal"] + + + } + + + , + "json": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Write report in JSON format to this file", + "help_text": "Type: `boolean_true`, default: `false`. Write report in JSON format to this file.\n" + , + "default": "False" + } + + + , + "output": { + "type": + "string", + "description": "Type: List of `file`, required, default: `$id.$key.output_*.fast[a,q]`, example: `fastq/*_001.fast[a,q]`, multiple_sep: `\":\"`. Glob pattern for matching the expected output files", + "help_text": "Type: List of `file`, required, default: `$id.$key.output_*.fast[a,q]`, example: `fastq/*_001.fast[a,q]`, multiple_sep: `\":\"`. Glob pattern for matching the expected output files.\nShould include `$output_dir`.\n" + , + "default": "$id.$key.output_*.fast[a,q]" + } + + + , + "fasta": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output FASTA to standard output even on FASTQ input", + "help_text": "Type: `boolean_true`, default: `false`. Output FASTA to standard output even on FASTQ input.\n" + , + "default": "False" + } + + + , + "info_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Write information about each read and its adapter matches\ninto info", + "help_text": "Type: `boolean_true`, default: `false`. Write information about each read and its adapter matches\ninto info.txt in the output directory.\nSee the documentation for the file format.\n" + , + "default": "False" + } + + +} +}, + + + "debug" : { + "title": "Debug", + "type": "object", + "description": "No description", + "properties": { + + + "debug": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Print debug information", + "help_text": "Type: `boolean_true`, default: `false`. Print debug information" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/specify adapters for r1" + }, + + { + "$ref": "#/definitions/specify adapters using fasta files for r1" + }, + + { + "$ref": "#/definitions/specify adapters for r2" + }, + + { + "$ref": "#/definitions/specify adapters using fasta files for r2" + }, + + { + "$ref": "#/definitions/paired-end options" + }, + + { + "$ref": "#/definitions/input parameters" + }, + + { + "$ref": "#/definitions/read modifications" + }, + + { + "$ref": "#/definitions/filtering of processed reads" + }, + + { + "$ref": "#/definitions/output parameters" + }, + + { + "$ref": "#/definitions/debug" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/.config.vsh.yaml new file mode 100644 index 0000000..c7e812b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/.config.vsh.yaml @@ -0,0 +1,330 @@ +name: "falco" +version: "v0.1.0" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "input fastq files" + info: null + example: + - "input1.fastq;input2.fastq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Run arguments" + arguments: + - type: "boolean_true" + name: "--nogroup" + description: "Disable grouping of bases for reads >50bp. \nAll reports will show\ + \ data for every base in \nthe read. WARNING: When using this option, \nyour\ + \ plots may end up a ridiculous size. You \nhave been warned!\n" + info: null + direction: "input" + - type: "file" + name: "--contaminents" + description: "Specifies a non-default file which contains \nthe list of contaminants\ + \ to screen \noverrepresented sequences against. The file \nmust contain sets\ + \ of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith\ + \ a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--adapters" + description: "Specifies a non-default file which contains \nthe list of adapter\ + \ sequences which will be \nexplicity searched against the library. The \nfile\ + \ must contain sets of named adapters in \nthe form name[tab]sequence. Lines\ + \ prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--limits" + description: "Specifies a non-default file which contains \na set of criteria\ + \ which will be used to \ndetermine the warn/error limits for the \nvarious\ + \ modules. This file can also be used \nto selectively remove some modules from\ + \ the \noutput all together. The format needs to \nmirror the default limits.txt\ + \ file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--subsample" + alternatives: + - "-s" + description: "[Falco only] makes falco faster (but \npossibly less accurate) by\ + \ only processing \nreads that are a multiple of this value (using \n0-based\ + \ indexing to number reads).\n" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bisulfite" + alternatives: + - "-b" + description: "[Falco only] reads are whole genome \nbisulfite sequencing, and\ + \ more Ts and fewer \nCs are therefore expected and will be \naccounted for\ + \ in base content.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--reverse_complliment" + alternatives: + - "-r" + description: "[Falco only] The input is a \nreverse-complement. All modules will\ + \ be \ntested by swapping A/T and C/G\n" + info: null + direction: "input" +- name: "Output arguments" + arguments: + - type: "file" + name: "--outdir" + alternatives: + - "-o" + description: "Create all output files in the specified \noutput directory. FALCO-SPECIFIC:\ + \ If the \ndirectory does not exists, the program will \ncreate it.\n" + info: null + example: + - "output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--format" + alternatives: + - "-f" + description: "Bypasses the normal sequence file format \ndetection and forces\ + \ the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped,\ + \ sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n" + info: null + required: false + choices: + - "bam" + - "sam" + - "bam_mapped" + - "sam_mapped" + - "fastq" + - "fq" + - "fastq.gz" + - "fq.gz" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--data_filename" + alternatives: + - "-D" + description: "[Falco only] Specify filename for FastQC \ndata output (TXT). If\ + \ not specified, it will \nbe called fastq_data.txt in either the input \nfile's\ + \ directory or the one specified in the \n--output flag. Only available when\ + \ running \nfalco with a single input.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--report_filename" + alternatives: + - "-R" + description: "[Falco only] Specify filename for FastQC \nreport output (HTML).\ + \ If not specified, it \nwill be called fastq_report.html in either \nthe input\ + \ file's directory or the one \nspecified in the --output flag. Only \navailable\ + \ when running falco with a single \ninput.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--summary_filename" + alternatives: + - "-S" + description: "[Falco only] Specify filename for the short \nsummary output (TXT).\ + \ If not specified, it \nwill be called fastq_report.html in either \nthe input\ + \ file's directory or the one \nspecified in the --output flag. Only \navailable\ + \ when running falco with a single \ninput.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "A C++ drop-in replacement of FastQC to assess the quality of sequence\ + \ read data" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "qc" +- "fastqc" +- "sequencing" +license: "GPL-3.0" +references: + doi: + - "10.12688/f1000research.21142.2" +links: + repository: "https://github.com/smithlabcode/falco" + documentation: "https://falco.readthedocs.io/en/latest/" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:trixie-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "wget" + - "build-essential" + - "g++" + - "zlib1g-dev" + - "procps" + interactive: false + - type: "docker" + run: + - "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz\ + \ -O /tmp/falco.tar.gz && \\\ncd /tmp && \\\ntar xvf falco.tar.gz && \\\ncd\ + \ falco-1.2.2 && \\\n./configure && \\\nmake all && \\\nmake install\n" + - type: "docker" + run: + - "echo \"falco: \\\"$(falco -v | sed -n 's/^falco //p')\\\"\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/falco/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/falco" + executable: "target/nextflow/falco/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/main.nf new file mode 100644 index 0000000..d0389df --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/main.nf @@ -0,0 +1,3696 @@ +// falco v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "falco", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input fastq files", + "example" : [ + "input1.fastq;input2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Run arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--nogroup", + "description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--contaminents", + "description" : "Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--adapters", + "description" : "Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--limits", + "description" : "Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--subsample", + "alternatives" : [ + "-s" + ], + "description" : "[Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bisulfite", + "alternatives" : [ + "-b" + ], + "description" : "[Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--reverse_complliment", + "alternatives" : [ + "-r" + ], + "description" : "[Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n", + "direction" : "input" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--outdir", + "alternatives" : [ + "-o" + ], + "description" : "Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n", + "example" : [ + "output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--format", + "alternatives" : [ + "-f" + ], + "description" : "Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n", + "required" : false, + "choices" : [ + "bam", + "sam", + "bam_mapped", + "sam_mapped", + "fastq", + "fq", + "fastq.gz", + "fq.gz" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--data_filename", + "alternatives" : [ + "-D" + ], + "description" : "[Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile's directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--report_filename", + "alternatives" : [ + "-R" + ], + "description" : "[Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--summary_filename", + "alternatives" : [ + "-S" + ], + "description" : "[Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "A C++ drop-in replacement of FastQC to assess the quality of sequence read data", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "qc", + "fastqc", + "sequencing" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.12688/f1000research.21142.2" + ] + }, + "links" : { + "repository" : "https://github.com/smithlabcode/falco", + "documentation" : "https://falco.readthedocs.io/en/latest/" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:trixie-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "wget", + "build-essential", + "g++", + "zlib1g-dev", + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz -O /tmp/falco.tar.gz && \\\\\ncd /tmp && \\\\\ntar xvf falco.tar.gz && \\\\\ncd falco-1.2.2 && \\\\\n./configure && \\\\\nmake all && \\\\\nmake install\n" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"falco: \\\\\\"$(falco -v | sed -n 's/^falco //p')\\\\\\"\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/falco/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/falco", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_NOGROUP+x} ]; then echo "${VIASH_PAR_NOGROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nogroup='&'#" ; else echo "# par_nogroup="; fi ) +$( if [ ! -z ${VIASH_PAR_CONTAMINENTS+x} ]; then echo "${VIASH_PAR_CONTAMINENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminents='&'#" ; else echo "# par_contaminents="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTERS+x} ]; then echo "${VIASH_PAR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapters='&'#" ; else echo "# par_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_LIMITS+x} ]; then echo "${VIASH_PAR_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limits='&'#" ; else echo "# par_limits="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "${VIASH_PAR_SUBSAMPLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subsample='&'#" ; else echo "# par_subsample="; fi ) +$( if [ ! -z ${VIASH_PAR_BISULFITE+x} ]; then echo "${VIASH_PAR_BISULFITE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bisulfite='&'#" ; else echo "# par_bisulfite="; fi ) +$( if [ ! -z ${VIASH_PAR_REVERSE_COMPLLIMENT+x} ]; then echo "${VIASH_PAR_REVERSE_COMPLLIMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reverse_complliment='&'#" ; else echo "# par_reverse_complliment="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTDIR+x} ]; then echo "${VIASH_PAR_OUTDIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_outdir='&'#" ; else echo "# par_outdir="; fi ) +$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_FILENAME+x} ]; then echo "${VIASH_PAR_DATA_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_filename='&'#" ; else echo "# par_data_filename="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT_FILENAME+x} ]; then echo "${VIASH_PAR_REPORT_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report_filename='&'#" ; else echo "# par_report_filename="; fi ) +$( if [ ! -z ${VIASH_PAR_SUMMARY_FILENAME+x} ]; then echo "${VIASH_PAR_SUMMARY_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary_filename='&'#" ; else echo "# par_summary_filename="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +[[ "\\$par_nogroup" == "false" ]] && unset par_nogroup +[[ "\\$par_bisulfite" == "false" ]] && unset par_bisulfite +[[ "\\$par_reverse_compliment" == "false" ]] && unset par_reverse_compliment + +IFS=";" read -ra input <<< \\$par_input + +\\$(which falco) \\\\ + \\${par_nogroup:+--nogroup} \\\\ + \\${par_contaminants:+--contaminants "\\$par_contaminants"} \\\\ + \\${par_adapters:+--adapters "\\$par_adapters"} \\\\ + \\${par_limits:+--limits "\\$par_limits"} \\\\ + \\${par_subsample:+-subsample \\$par_subsample} \\\\ + \\${par_bisulfite:+-bisulfite} \\\\ + \\${par_reverse_compliment:+-reverse-compliment} \\\\ + \\${par_outdir:+--outdir "\\$par_outdir"} \\\\ + \\${par_format:+--format "\\$par_format"} \\\\ + \\${par_data_filename:+-data-filename "\\$par_data_filename"} \\\\ + \\${par_report_filename:+-report-filename "\\$par_report_filename"} \\\\ + \\${par_summary_filename:+-summary-filename "\\$par_summary_filename"} \\\\ + \\${input[*]} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/falco", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow.config new file mode 100644 index 0000000..3ac4103 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'falco' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'A C++ drop-in replacement of FastQC to assess the quality of sequence read data' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow_schema.json new file mode 100644 index 0000000..71b5872 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/nextflow_schema.json @@ -0,0 +1,227 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "falco", +"description": "A C++ drop-in replacement of FastQC to assess the quality of sequence read data", +"type": "object", +"definitions": { + + + + "input arguments" : { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\":\"`. input fastq files", + "help_text": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\":\"`. input fastq files" + + } + + +} +}, + + + "run arguments" : { + "title": "Run arguments", + "type": "object", + "description": "No description", + "properties": { + + + "nogroup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp", + "help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n" + , + "default": "False" + } + + + , + "contaminents": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against", + "help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n" + + } + + + , + "adapters": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library", + "help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n" + + } + + + , + "limits": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules", + "help_text": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n" + + } + + + , + "subsample": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads)", + "help_text": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n" + + } + + + , + "bisulfite": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content", + "help_text": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n" + , + "default": "False" + } + + + , + "reverse_complliment": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement", + "help_text": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n" + , + "default": "False" + } + + +} +}, + + + "output arguments" : { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "outdir": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.outdir.outdir`, example: `output`. Create all output files in the specified \noutput directory", + "help_text": "Type: `file`, required, default: `$id.$key.outdir.outdir`, example: `output`. Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n" + , + "default": "$id.$key.outdir.outdir" + } + + + , + "format": { + "type": + "string", + "description": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format", + "help_text": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n", + "enum": ["bam", "sam", "bam_mapped", "sam_mapped", "fastq", "fq", "fastq.gz", "fq.gz"] + + + } + + + , + "data_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.data_filename.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT)", + "help_text": "Type: `file`, default: `$id.$key.data_filename.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile\u0027s directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n" + , + "default": "$id.$key.data_filename.data_filename" + } + + + , + "report_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.report_filename.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML)", + "help_text": "Type: `file`, default: `$id.$key.report_filename.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n" + , + "default": "$id.$key.report_filename.report_filename" + } + + + , + "summary_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.summary_filename.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT)", + "help_text": "Type: `file`, default: `$id.$key.summary_filename.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n" + , + "default": "$id.$key.summary_filename.summary_filename" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input arguments" + }, + + { + "$ref": "#/definitions/run arguments" + }, + + { + "$ref": "#/definitions/output arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/.config.vsh.yaml new file mode 100644 index 0000000..fcf4762 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/.config.vsh.yaml @@ -0,0 +1,464 @@ +name: "multiqc" +version: "v0.1.0" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "File paths to be searched for analysis results to be included in\ + \ the report.\n" + info: null + example: + - "data/results" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output_report" + description: "Filepath of the generated report.\n" + info: null + example: + - "multiqc_report.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_data" + description: "Output directory for parsed data files. If not provided, parsed\ + \ data will not be published.\n" + info: null + example: + - "multiqc_data" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plots" + description: "Output directory for generated plots. If not provided, plots will\ + \ not be published.\n" + info: null + example: + - "multiqc_plots" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Modules and analyses to run" + arguments: + - type: "string" + name: "--include_modules" + description: "Use only these module" + info: null + example: + - "fastqc,cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "string" + name: "--exclude_modules" + description: "Do not use only these modules" + info: null + example: + - "fastqc,cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "string" + name: "--ignore_analysis" + info: null + example: + - "run_one/*,run_two/*" + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "string" + name: "--ignore_samples" + info: null + example: + - "sample_1*,sample_3*" + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "boolean_true" + name: "--ignore_symlinks" + description: "Ignore symlinked directories and files" + info: null + direction: "input" +- name: "Sample name handling" + arguments: + - type: "boolean_true" + name: "--dirs" + description: "Prepend directory to sample names to avoid clashing filenames" + info: null + direction: "input" + - type: "integer" + name: "--dirs_depth" + description: "Prepend n directories to sample names. Negative number to take from\ + \ start of path." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--full_names" + description: "Do not clean the sample names (leave as full file name)" + info: null + direction: "input" + - type: "boolean_true" + name: "--fn_as_s_name" + description: "Use the log filename as the sample name" + info: null + direction: "input" + - type: "file" + name: "--replace_names" + description: "TSV file to rename sample names during report generation" + info: null + example: + - "replace_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Report Customisation" + arguments: + - type: "string" + name: "--title" + description: "Report title. Printed as page header, used for filename if not otherwise\ + \ specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--comment" + description: "Custom comment, will be printed at the top of the report.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--template" + description: "Report template to use.\n" + info: null + required: false + choices: + - "default" + - "gathered" + - "geo" + - "highcharts" + - "sections" + - "simple" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_names" + description: "TSV file containing alternative sample names for renaming buttons\ + \ in the report.\n" + info: null + example: + - "sample_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_filters" + description: "TSV file containing show/hide patterns for the report\n" + info: null + example: + - "sample_filters.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--custom_css_file" + description: "Custom CSS file to add to the final report\n" + info: null + example: + - "custom_style_sheet.css" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--profile_runtime" + description: "Add analysis of how long MultiQC takes to run to the report\n" + info: null + direction: "input" +- name: "MultiQC behaviour" + arguments: + - type: "boolean_true" + name: "--verbose" + description: "Increase output verbosity.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + description: "Only show log warnings\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--strict" + description: "Don't catch exceptions, run additional code checks to help development.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--development" + description: "Development mode. Do not compress and minimise JS, export uncompressed\ + \ plot data.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--require_logs" + description: "Require all explicitly requested modules to have log files. If not,\ + \ MultiQC will exit with an error.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_megaqc_upload" + description: "Don't upload generated report to MegaQC, even if MegaQC options\ + \ are found.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_ansi" + description: "Disable coloured log output.\n" + info: null + direction: "input" + - type: "string" + name: "--cl_config" + description: "YAML formatted string that allows to customize MultiQC behaviour\ + \ like input file detection.\n" + info: null + example: + - "qualimap_config: { general_stats_coverage: [20,40,200] }" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output format" + arguments: + - type: "boolean_true" + name: "--flat" + description: "Use only flat plots (static images).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--interactive" + description: "Use only interactive plots (in-browser Javascript).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--data_dir" + description: "Force the parsed data directory to be created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_data_dir" + description: "Prevent the parsed data directory from being created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--zip_data_dir" + description: "Compress the data directory.\n" + info: null + direction: "input" + - type: "string" + name: "--data_format" + description: "Output parsed data in a different format than the default 'txt'.\n" + info: null + required: false + choices: + - "tsv" + - "csv" + - "json" + - "yaml" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--pdf" + description: "Creates PDF report with the 'simple' template. Requires Pandoc to\ + \ be installed.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "MultiQC aggregates results from bioinformatics analyses across many\ + \ samples into a single report.\nIt searches a given directory for analysis logs\ + \ and compiles a HTML report. It's a general use tool, perfect for summarising the\ + \ output from numerous bioinformatics tools.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: + keywords: + - "QC" + - "html report" + - "aggregate analysis" + links: + homepage: "https://multiqc.info/" + documentation: "https://multiqc.info/docs/" + repository: "https://github.com/MultiQC/MultiQC" + references: + doi: "10.1093/bioinformatics/btw354" + licence: "GPL v3 or later" +status: "enabled" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/biobox" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "multiqc --version | sed 's/multiqc, version\\s\\(.*\\)/multiqc: \"\\1\"/' >\ + \ /var/software_versions.txt\n" + test_setup: + - type: "apt" + packages: + - "jq" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/multiqc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/multiqc" + executable: "target/nextflow/multiqc/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/main.nf new file mode 100644 index 0000000..d7f771d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/main.nf @@ -0,0 +1,4016 @@ +// multiqc v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "multiqc", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "File paths to be searched for analysis results to be included in the report.\n", + "example" : [ + "data/results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output_report", + "description" : "Filepath of the generated report.\n", + "example" : [ + "multiqc_report.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_data", + "description" : "Output directory for parsed data files. If not provided, parsed data will not be published.\n", + "example" : [ + "multiqc_data" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plots", + "description" : "Output directory for generated plots. If not provided, plots will not be published.\n", + "example" : [ + "multiqc_plots" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Modules and analyses to run", + "arguments" : [ + { + "type" : "string", + "name" : "--include_modules", + "description" : "Use only these module", + "example" : [ + "fastqc,cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "string", + "name" : "--exclude_modules", + "description" : "Do not use only these modules", + "example" : [ + "fastqc,cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "string", + "name" : "--ignore_analysis", + "example" : [ + "run_one/*,run_two/*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "string", + "name" : "--ignore_samples", + "example" : [ + "sample_1*,sample_3*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "boolean_true", + "name" : "--ignore_symlinks", + "description" : "Ignore symlinked directories and files", + "direction" : "input" + } + ] + }, + { + "name" : "Sample name handling", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--dirs", + "description" : "Prepend directory to sample names to avoid clashing filenames", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--dirs_depth", + "description" : "Prepend n directories to sample names. Negative number to take from start of path.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--full_names", + "description" : "Do not clean the sample names (leave as full file name)", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fn_as_s_name", + "description" : "Use the log filename as the sample name", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--replace_names", + "description" : "TSV file to rename sample names during report generation", + "example" : [ + "replace_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Report Customisation", + "arguments" : [ + { + "type" : "string", + "name" : "--title", + "description" : "Report title. Printed as page header, used for filename if not otherwise specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--comment", + "description" : "Custom comment, will be printed at the top of the report.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--template", + "description" : "Report template to use.\n", + "required" : false, + "choices" : [ + "default", + "gathered", + "geo", + "highcharts", + "sections", + "simple" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_names", + "description" : "TSV file containing alternative sample names for renaming buttons in the report.\n", + "example" : [ + "sample_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_filters", + "description" : "TSV file containing show/hide patterns for the report\n", + "example" : [ + "sample_filters.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--custom_css_file", + "description" : "Custom CSS file to add to the final report\n", + "example" : [ + "custom_style_sheet.css" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--profile_runtime", + "description" : "Add analysis of how long MultiQC takes to run to the report\n", + "direction" : "input" + } + ] + }, + { + "name" : "MultiQC behaviour", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--verbose", + "description" : "Increase output verbosity.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "description" : "Only show log warnings\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--strict", + "description" : "Don't catch exceptions, run additional code checks to help development.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--development", + "description" : "Development mode. Do not compress and minimise JS, export uncompressed plot data.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--require_logs", + "description" : "Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_megaqc_upload", + "description" : "Don't upload generated report to MegaQC, even if MegaQC options are found.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_ansi", + "description" : "Disable coloured log output.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--cl_config", + "description" : "YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n", + "example" : [ + "qualimap_config: { general_stats_coverage: [20,40,200] }" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output format", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--flat", + "description" : "Use only flat plots (static images).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--interactive", + "description" : "Use only interactive plots (in-browser Javascript).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--data_dir", + "description" : "Force the parsed data directory to be created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_data_dir", + "description" : "Prevent the parsed data directory from being created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--zip_data_dir", + "description" : "Compress the data directory.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--data_format", + "description" : "Output parsed data in a different format than the default 'txt'.\n", + "required" : false, + "choices" : [ + "tsv", + "csv", + "json", + "yaml" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--pdf", + "description" : "Creates PDF report with the 'simple' template. Requires Pandoc to be installed.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "info" : { + "keywords" : [ + "QC", + "html report", + "aggregate analysis" + ], + "links" : { + "homepage" : "https://multiqc.info/", + "documentation" : "https://multiqc.info/docs/", + "repository" : "https://github.com/MultiQC/MultiQC" + }, + "references" : { + "doi" : "10.1093/bioinformatics/btw354" + }, + "licence" : "GPL v3 or later" + }, + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/biobox" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "multiqc --version | sed 's/multiqc, version\\\\s\\\\(.*\\\\)/multiqc: \\"\\\\1\\"/' > /var/software_versions.txt\n" + ] + } + ], + "test_setup" : [ + { + "type" : "apt", + "packages" : [ + "jq" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/multiqc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/multiqc", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_REPORT+x} ]; then echo "${VIASH_PAR_OUTPUT_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_report='&'#" ; else echo "# par_output_report="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DATA+x} ]; then echo "${VIASH_PAR_OUTPUT_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_data='&'#" ; else echo "# par_output_data="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOTS+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plots='&'#" ; else echo "# par_output_plots="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_INCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_modules='&'#" ; else echo "# par_include_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_EXCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude_modules='&'#" ; else echo "# par_exclude_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_ANALYSIS+x} ]; then echo "${VIASH_PAR_IGNORE_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_analysis='&'#" ; else echo "# par_ignore_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SAMPLES+x} ]; then echo "${VIASH_PAR_IGNORE_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_samples='&'#" ; else echo "# par_ignore_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SYMLINKS+x} ]; then echo "${VIASH_PAR_IGNORE_SYMLINKS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_symlinks='&'#" ; else echo "# par_ignore_symlinks="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS+x} ]; then echo "${VIASH_PAR_DIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs='&'#" ; else echo "# par_dirs="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS_DEPTH+x} ]; then echo "${VIASH_PAR_DIRS_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs_depth='&'#" ; else echo "# par_dirs_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_NAMES+x} ]; then echo "${VIASH_PAR_FULL_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_names='&'#" ; else echo "# par_full_names="; fi ) +$( if [ ! -z ${VIASH_PAR_FN_AS_S_NAME+x} ]; then echo "${VIASH_PAR_FN_AS_S_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fn_as_s_name='&'#" ; else echo "# par_fn_as_s_name="; fi ) +$( if [ ! -z ${VIASH_PAR_REPLACE_NAMES+x} ]; then echo "${VIASH_PAR_REPLACE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_replace_names='&'#" ; else echo "# par_replace_names="; fi ) +$( if [ ! -z ${VIASH_PAR_TITLE+x} ]; then echo "${VIASH_PAR_TITLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_title='&'#" ; else echo "# par_title="; fi ) +$( if [ ! -z ${VIASH_PAR_COMMENT+x} ]; then echo "${VIASH_PAR_COMMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_comment='&'#" ; else echo "# par_comment="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPLATE+x} ]; then echo "${VIASH_PAR_TEMPLATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_template='&'#" ; else echo "# par_template="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_NAMES+x} ]; then echo "${VIASH_PAR_SAMPLE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_names='&'#" ; else echo "# par_sample_names="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_FILTERS+x} ]; then echo "${VIASH_PAR_SAMPLE_FILTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_filters='&'#" ; else echo "# par_sample_filters="; fi ) +$( if [ ! -z ${VIASH_PAR_CUSTOM_CSS_FILE+x} ]; then echo "${VIASH_PAR_CUSTOM_CSS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_custom_css_file='&'#" ; else echo "# par_custom_css_file="; fi ) +$( if [ ! -z ${VIASH_PAR_PROFILE_RUNTIME+x} ]; then echo "${VIASH_PAR_PROFILE_RUNTIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_profile_runtime='&'#" ; else echo "# par_profile_runtime="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT+x} ]; then echo "${VIASH_PAR_STRICT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict='&'#" ; else echo "# par_strict="; fi ) +$( if [ ! -z ${VIASH_PAR_DEVELOPMENT+x} ]; then echo "${VIASH_PAR_DEVELOPMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_development='&'#" ; else echo "# par_development="; fi ) +$( if [ ! -z ${VIASH_PAR_REQUIRE_LOGS+x} ]; then echo "${VIASH_PAR_REQUIRE_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_require_logs='&'#" ; else echo "# par_require_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_MEGAQC_UPLOAD+x} ]; then echo "${VIASH_PAR_NO_MEGAQC_UPLOAD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_megaqc_upload='&'#" ; else echo "# par_no_megaqc_upload="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_ANSI+x} ]; then echo "${VIASH_PAR_NO_ANSI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_ansi='&'#" ; else echo "# par_no_ansi="; fi ) +$( if [ ! -z ${VIASH_PAR_CL_CONFIG+x} ]; then echo "${VIASH_PAR_CL_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cl_config='&'#" ; else echo "# par_cl_config="; fi ) +$( if [ ! -z ${VIASH_PAR_FLAT+x} ]; then echo "${VIASH_PAR_FLAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_flat='&'#" ; else echo "# par_flat="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERACTIVE+x} ]; then echo "${VIASH_PAR_INTERACTIVE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interactive='&'#" ; else echo "# par_interactive="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_DIR+x} ]; then echo "${VIASH_PAR_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_dir='&'#" ; else echo "# par_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_DATA_DIR+x} ]; then echo "${VIASH_PAR_NO_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_data_dir='&'#" ; else echo "# par_no_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP_DATA_DIR+x} ]; then echo "${VIASH_PAR_ZIP_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip_data_dir='&'#" ; else echo "# par_zip_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_FORMAT+x} ]; then echo "${VIASH_PAR_DATA_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_format='&'#" ; else echo "# par_data_format="; fi ) +$( if [ ! -z ${VIASH_PAR_PDF+x} ]; then echo "${VIASH_PAR_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pdf='&'#" ; else echo "# par_pdf="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +# disable flags +[[ "\\$par_ignore_symlinks" == "false" ]] && unset par_ignore_symlinks +[[ "\\$par_dirs" == "false" ]] && unset par_dirs +[[ "\\$par_full_names" == "false" ]] && unset par_full_names +[[ "\\$par_fn_as_s_name" == "false" ]] && unset par_fn_as_s_name +[[ "\\$par_profile_runtime" == "false" ]] && unset par_profile_runtime +[[ "\\$par_verbose" == "false" ]] && unset par_verbose +[[ "\\$par_quiet" == "false" ]] && unset par_quiet +[[ "\\$par_strict" == "false" ]] && unset par_strict +[[ "\\$par_development" == "false" ]] && unset par_development +[[ "\\$par_require_logs" == "false" ]] && unset par_require_logs +[[ "\\$par_no_megaqc_upload" == "false" ]] && unset par_no_megaqc_upload +[[ "\\$par_no_ansi" == "false" ]] && unset par_no_ansi +[[ "\\$par_flat" == "false" ]] && unset par_flat +[[ "\\$par_interactive" == "false" ]] && unset par_interactive +[[ "\\$par_static_plot_export" == "false" ]] && unset par_static_plot_export +[[ "\\$par_data_dir" == "false" ]] && unset par_data_dir +[[ "\\$par_no_data_dir" == "false" ]] && unset par_no_data_dir +[[ "\\$par_zip_data_dir" == "false" ]] && unset par_zip_data_dir +[[ "\\$par_pdf" == "false" ]] && unset par_pdf + + +# handle inputs +out_dir=\\$(dirname "\\$par_output_report") +output_report_file=\\$(basename "\\$par_output_report") +report_name="\\${output_report_file%.*}" + +# handle outputs +[[ -z "\\$par_output_report" ]] && no_report=true +[[ -z "\\$par_output_data" ]] && no_data_dir=true +[[ ! -z "\\$par_output_data" ]] && data_dir=true +[[ ! -z "\\$par_output_plots" ]] && export=true + +# handle multiples +IFS=";" read -ra inputs <<< \\$par_input + +if [[ -n "\\$par_include_modules" ]]; then + include_modules="" + IFS="," read -ra incl_modules <<< \\$par_include_modules + for i in "\\${incl_modules[@]}"; do + include_modules+="--include \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_exclude_modules" ]]; then + exclude_modules="" + IFS="," read -ra excl_modules <<< \\$par_exclude_modules + for i in "\\${excl_modules[@]}"; do + exclude_modules+="--exclude \\$i" + done + unset IFS +fi + +if [[ -n "\\$par_ignore_analysis" ]]; then + ignore="" + IFS="," read -ra ignore_analysis <<< \\$par_ignore_analysis + for i in "\\${ignore_analysis[@]}"; do + ignore+="--ignore \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_ignore_samples" ]]; then + ignore_samples="" + IFS="," read -ra ign_samples <<< \\$par_ignore_samples + for i in "\\${ign_samples[@]}"; do + ignore_samples+="--ignore-samples \\$i" + done + unset IFS +fi + +# run multiqc +multiqc \\\\ + \\${par_output_report:+--filename "\\$report_name"} \\\\ + \\${out_dir:+--outdir "\\$out_dir"} \\\\ + \\${no_report:+--no-report} \\\\ + \\${no_data_dir:+--no-data-dir} \\\\ + \\${data_dir:+--data-dir} \\\\ + \\${export:+--export} \\\\ + \\${par_title:+--title "\\$par_title"} \\\\ + \\${par_comment:+--comment "\\$par_comment"} \\\\ + \\${par_template:+--template "\\$par_template"} \\\\ + \\${par_sample_names:+--sample-names "\\$par_sample_names"} \\\\ + \\${par_sample_filters:+--sample-filters "\\$par_sample_filters"} \\\\ + \\${par_custom_css_file:+--custom-css-file "\\$par_custom_css_file"} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_dirs:+--dirs} \\\\ + \\${par_dirs_depth:+--dirs-depth "\\$par_dirs_depth"} \\\\ + \\${par_full_names:+--full-names} \\\\ + \\${par_fn_as_s_name:+--fn-as-s-name} \\\\ + \\${par_ignore_names:+--ignore-names "\\$par_ignore_names"} \\\\ + \\${par_ignore_symlinks:+--ignore-symlinks} \\\\ + \\${ignore_samples} \\\\ + \\${ignore} \\\\ + \\${exclude_modules} \\\\ + \\${include_modules} \\\\ + \\${par_include_modules:+--include-modules "\\$par_include_modules"} \\\\ + \\${par_data_format:+--data-format "\\$par_data_format"} \\\\ + \\${par_cl_config:+--cl-config "\\$par_cl_config"} \\\\ + \\${par_zip_data_dir:+--zip-data-dir} \\\\ + \\${par_pdf:+--pdf} \\\\ + \\${par_interactive:+--interactive} \\\\ + \\${par_flat:+--flat} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${par_strict:+--strict} \\\\ + \\${par_no_megaqc_upload:+--no-megaqc-upload} \\\\ + \\${par_no_ansi:+--no-ansi} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_require_logs:+--require-logs} \\\\ + \\${par_development:+--development} \\\\ + --force \\\\ + "\\${inputs[@]}" + +# Move outputs + +if [[ -n "\\$par_output_data" ]] && [[ -d "\\${out_dir}/\\${report_name}_data" ]]; then + mv "\\${out_dir}/\\${report_name}_data" "\\$par_output_data" +elif [[ -n "\\$par_output_data" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_data" ]]; then + echo "WARNING: Data could not be saved because data folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi + +if [[ -n "\\$par_output_plots" ]] && [[ -d "\\${out_dir}/\\${report_name}_plots" ]]; then + mv "\\${out_dir}/\\${report_name}_plots" "\\$par_output_plots" +elif [[ -n "\\$par_output_plots" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_plots" ]]; then + echo "WARNING: Plots could not be saved because plots folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/multiqc", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow.config new file mode 100644 index 0000000..2443ee9 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'multiqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\'s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow_schema.json new file mode 100644 index 0000000..daab1d2 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/nextflow_schema.json @@ -0,0 +1,529 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multiqc", +"description": "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\u0027s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `data/results/`, multiple_sep: `\":\"`. File paths to be searched for analysis results to be included in the report", + "help_text": "Type: List of `file`, required, example: `data/results/`, multiple_sep: `\":\"`. File paths to be searched for analysis results to be included in the report.\n" + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report", + "help_text": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report.\n" + , + "default": "$id.$key.output_report.html" + } + + + , + "output_data": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_data.output_data`, example: `multiqc_data`. Output directory for parsed data files", + "help_text": "Type: `file`, default: `$id.$key.output_data.output_data`, example: `multiqc_data`. Output directory for parsed data files. If not provided, parsed data will not be published.\n" + , + "default": "$id.$key.output_data.output_data" + } + + + , + "output_plots": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_plots.output_plots`, example: `multiqc_plots`. Output directory for generated plots", + "help_text": "Type: `file`, default: `$id.$key.output_plots.output_plots`, example: `multiqc_plots`. Output directory for generated plots. If not provided, plots will not be published.\n" + , + "default": "$id.$key.output_plots.output_plots" + } + + +} +}, + + + "modules and analyses to run" : { + "title": "Modules and analyses to run", + "type": "object", + "description": "No description", + "properties": { + + + "include_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc,cutadapt`, multiple_sep: `\",\"`. Use only these module", + "help_text": "Type: List of `string`, example: `fastqc,cutadapt`, multiple_sep: `\",\"`. Use only these module" + + } + + + , + "exclude_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc,cutadapt`, multiple_sep: `\",\"`. Do not use only these modules", + "help_text": "Type: List of `string`, example: `fastqc,cutadapt`, multiple_sep: `\",\"`. Do not use only these modules" + + } + + + , + "ignore_analysis": { + "type": + "string", + "description": "Type: List of `string`, example: `run_one/*,run_two/*`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `string`, example: `run_one/*,run_two/*`, multiple_sep: `\",\"`. " + + } + + + , + "ignore_samples": { + "type": + "string", + "description": "Type: List of `string`, example: `sample_1*,sample_3*`, multiple_sep: `\",\"`. ", + "help_text": "Type: List of `string`, example: `sample_1*,sample_3*`, multiple_sep: `\",\"`. " + + } + + + , + "ignore_symlinks": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files", + "help_text": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files" + , + "default": "False" + } + + +} +}, + + + "sample name handling" : { + "title": "Sample name handling", + "type": "object", + "description": "No description", + "properties": { + + + "dirs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames", + "help_text": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames" + , + "default": "False" + } + + + , + "dirs_depth": { + "type": + "integer", + "description": "Type: `integer`. Prepend n directories to sample names", + "help_text": "Type: `integer`. Prepend n directories to sample names. Negative number to take from start of path." + + } + + + , + "full_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)", + "help_text": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)" + , + "default": "False" + } + + + , + "fn_as_s_name": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name", + "help_text": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name" + , + "default": "False" + } + + + , + "replace_names": { + "type": + "string", + "description": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation", + "help_text": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation" + + } + + +} +}, + + + "report customisation" : { + "title": "Report Customisation", + "type": "object", + "description": "No description", + "properties": { + + + "title": { + "type": + "string", + "description": "Type: `string`. Report title", + "help_text": "Type: `string`. Report title. Printed as page header, used for filename if not otherwise specified.\n" + + } + + + , + "comment": { + "type": + "string", + "description": "Type: `string`. Custom comment, will be printed at the top of the report", + "help_text": "Type: `string`. Custom comment, will be printed at the top of the report.\n" + + } + + + , + "template": { + "type": + "string", + "description": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use", + "help_text": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use.\n", + "enum": ["default", "gathered", "geo", "highcharts", "sections", "simple"] + + + } + + + , + "sample_names": { + "type": + "string", + "description": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report", + "help_text": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report.\n" + + } + + + , + "sample_filters": { + "type": + "string", + "description": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n", + "help_text": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n" + + } + + + , + "custom_css_file": { + "type": + "string", + "description": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n", + "help_text": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n" + + } + + + , + "profile_runtime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n", + "help_text": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n" + , + "default": "False" + } + + +} +}, + + + "multiqc behaviour" : { + "title": "MultiQC behaviour", + "type": "object", + "description": "No description", + "properties": { + + + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Increase output verbosity", + "help_text": "Type: `boolean_true`, default: `false`. Increase output verbosity.\n" + , + "default": "False" + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Only show log warnings\n", + "help_text": "Type: `boolean_true`, default: `false`. Only show log warnings\n" + , + "default": "False" + } + + + , + "strict": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development.\n" + , + "default": "False" + } + + + , + "development": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Development mode", + "help_text": "Type: `boolean_true`, default: `false`. Development mode. Do not compress and minimise JS, export uncompressed plot data.\n" + , + "default": "False" + } + + + , + "require_logs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files", + "help_text": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n" + , + "default": "False" + } + + + , + "no_megaqc_upload": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found.\n" + , + "default": "False" + } + + + , + "no_ansi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable coloured log output", + "help_text": "Type: `boolean_true`, default: `false`. Disable coloured log output.\n" + , + "default": "False" + } + + + , + "cl_config": { + "type": + "string", + "description": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection", + "help_text": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n" + + } + + +} +}, + + + "output format" : { + "title": "Output format", + "type": "object", + "description": "No description", + "properties": { + + + "flat": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only flat plots (static images)", + "help_text": "Type: `boolean_true`, default: `false`. Use only flat plots (static images).\n" + , + "default": "False" + } + + + , + "interactive": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript)", + "help_text": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript).\n" + , + "default": "False" + } + + + , + "data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created", + "help_text": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created.\n" + , + "default": "False" + } + + + , + "no_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created", + "help_text": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created.\n" + , + "default": "False" + } + + + , + "zip_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Compress the data directory", + "help_text": "Type: `boolean_true`, default: `false`. Compress the data directory.\n" + , + "default": "False" + } + + + , + "data_format": { + "type": + "string", + "description": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027", + "help_text": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027.\n", + "enum": ["tsv", "csv", "json", "yaml"] + + + } + + + , + "pdf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template", + "help_text": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template. Requires Pandoc to be installed.\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/modules and analyses to run" + }, + + { + "$ref": "#/definitions/sample name handling" + }, + + { + "$ref": "#/definitions/report customisation" + }, + + { + "$ref": "#/definitions/multiqc behaviour" + }, + + { + "$ref": "#/definitions/output format" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/.config.vsh.yaml new file mode 100644 index 0000000..1acbafd --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/.config.vsh.yaml @@ -0,0 +1,406 @@ +name: "pear" +version: "v0.1.0" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--forward_fastq" + alternatives: + - "-f" + description: "Forward paired-end FASTQ file" + info: null + example: + - "forward.fastq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reverse_fastq" + alternatives: + - "-r" + description: "Reverse paired-end FASTQ file" + info: null + example: + - "reverse.fastq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--assembled" + description: "The output file containing assembled reads. Can be compressed with\ + \ gzip." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unassembled_forward" + description: "The output file containing forward reads that could not be assembled.\ + \ Can be compressed with gzip." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unassembled_reverse" + description: "The output file containing reverse reads that could not be assembled.\ + \ Can be compressed with gzip." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--discarded" + description: "The output file containing reads that were discarded due to too\ + \ low quality or too many uncalled bases. Can be compressed with gzip." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Arguments" + arguments: + - type: "double" + name: "--p_value" + alternatives: + - "-p" + description: "Specify a p-value for the statistical test. If the computed p-value\ + \ of a possible assembly exceeds the specified p-value then paired-end read\ + \ will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and 1.0.\ + \ Setting 1.0 disables the test.\n" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_overlap" + alternatives: + - "-v" + description: "Specify the minimum overlap size. The minimum overlap may be set\ + \ to 1 when the statistical test is used. However, further restricting the minimum\ + \ overlap size to a proper value may reduce false-positive assembles.\n" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_assembly_length" + alternatives: + - "-m" + description: "Specify the maximum possible length of the assembled sequences.\ + \ Setting this value to 0 disables the restriction and assembled sequences may\ + \ be arbitrary long.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_assembly_length" + alternatives: + - "-n" + description: "Specify the minimum possible length of the assembled sequences.\ + \ Setting this value to 0 disables the restriction and assembled sequences may\ + \ be arbitrary short.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_trim_length" + alternatives: + - "-t" + description: "Specify the minimum length of reads after trimming the low quality\ + \ part (see option -q)\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--quality_threshold" + alternatives: + - "-q" + description: "Specify the quality threshold for trimming the low quality part\ + \ of a read. If the quality scores of two consecutive bases are strictly less\ + \ than the specified threshold, the rest of the read will be trimmed.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--max_uncalled_base" + alternatives: + - "-u" + description: "Specify the maximal proportion of uncalled bases in a read. Setting\ + \ this value to 0 will cause PEAR to discard all reads containing uncalled bases.\ + \ The other extreme setting is 1 which causes PEAR to process all reads independent\ + \ on the number of uncalled bases.\n" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--test_method" + alternatives: + - "-g" + description: "Specify the type of statistical test. Two options are available.\ + \ 1: Given the minimum allowed overlap, test using the highest OES. Note that\ + \ due to its discrete nature, this test usually yields a lower p-value for the\ + \ assembled read than the cut- off (specified by -p). For example, setting the\ + \ cut-off to 0.05 using this test, the assembled reads might have an actual\ + \ p-value of 0.02.\n2. Use the acceptance probability (m.a.p). This test methods\ + \ computes the same probability as test method 1. However, it assumes that the\ + \ minimal overlap is the observed overlap with the highest OES, instead of the\ + \ one specified by -v. Therefore, this is not a valid statistical test and the\ + \ 'p-value' is in fact the maximal probability for accepting the assembly. Nevertheless,\ + \ we observed in practice that for the case the actual overlap sizes are relatively\ + \ small, test 2 can correctly assemble more reads with only slightly higher\ + \ false-positive rate.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--emperical_freqs" + alternatives: + - "-e" + description: "Disable empirical base frequencies.\n" + info: null + direction: "input" + - type: "integer" + name: "--score_method" + alternatives: + - "-s" + description: "Specify the scoring method. 1. OES with +1 for match and -1 for\ + \ mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch multiplied\ + \ by base quality scores. 3: Ignore quality scores and use +1 for a match and\ + \ -1 for a mismatch.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--phred_base" + alternatives: + - "-b" + description: "Base PHRED quality score.\n" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cap" + alternatives: + - "-c" + description: "Specify the upper bound for the resulting quality score. If set\ + \ to zero, capping is disabled.\n" + info: null + example: + - 40 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--nbase" + alternatives: + - "-z" + description: "When merging a base-pair that consists of two non-equal bases out\ + \ of which none is degenerate, set the merged base to N and use the highest\ + \ quality score of the two bases\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "PEAR is an ultrafast, memory-efficient and highly accurate pair-end\ + \ read merger. It is fully parallelized and can run with as low as just a few kilobytes\ + \ of memory.\n\nPEAR evaluates all possible paired-end read overlaps and without\ + \ requiring the target fragment size as input. In addition, it implements a statistical\ + \ test for minimizing false-positive results. Together with a highly optimized implementation,\ + \ it can merge millions of paired end reads within a couple of minutes on a standard\ + \ desktop computer.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "pair-end" +- "read" +- "merge" +license: "CC-BY-NC-SA-3.0" +references: + doi: + - "10.1093/bioinformatics/btt593" +links: + repository: "https://github.com/tseemann/PEAR" + homepage: "https://cme.h-its.org/exelixis/web/software/pear" + documentation: "https://cme.h-its.org/exelixis/web/software/pear/doc.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/pear:0.9.6--h9d449c0_10" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "version=$(pear -h | grep 'PEAR v' | sed 's/PEAR v//' | sed 's/ .*//') && \\\ + \necho \"pear: $version\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/pear/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/pear" + executable: "target/nextflow/pear/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/main.nf new file mode 100644 index 0000000..af0476c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/main.nf @@ -0,0 +1,3821 @@ +// pear v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "pear", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--forward_fastq", + "alternatives" : [ + "-f" + ], + "description" : "Forward paired-end FASTQ file", + "example" : [ + "forward.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reverse_fastq", + "alternatives" : [ + "-r" + ], + "description" : "Reverse paired-end FASTQ file", + "example" : [ + "reverse.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--assembled", + "description" : "The output file containing assembled reads. Can be compressed with gzip.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unassembled_forward", + "description" : "The output file containing forward reads that could not be assembled. Can be compressed with gzip.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unassembled_reverse", + "description" : "The output file containing reverse reads that could not be assembled. Can be compressed with gzip.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--discarded", + "description" : "The output file containing reads that were discarded due to too low quality or too many uncalled bases. Can be compressed with gzip.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "double", + "name" : "--p_value", + "alternatives" : [ + "-p" + ], + "description" : "Specify a p-value for the statistical test. If the computed p-value of a possible assembly exceeds the specified p-value then paired-end read will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables the test.\n", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_overlap", + "alternatives" : [ + "-v" + ], + "description" : "Specify the minimum overlap size. The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles.\n", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_assembly_length", + "alternatives" : [ + "-m" + ], + "description" : "Specify the maximum possible length of the assembled sequences. Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_assembly_length", + "alternatives" : [ + "-n" + ], + "description" : "Specify the minimum possible length of the assembled sequences. Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_trim_length", + "alternatives" : [ + "-t" + ], + "description" : "Specify the minimum length of reads after trimming the low quality part (see option -q)\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--quality_threshold", + "alternatives" : [ + "-q" + ], + "description" : "Specify the quality threshold for trimming the low quality part of a read. If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--max_uncalled_base", + "alternatives" : [ + "-u" + ], + "description" : "Specify the maximal proportion of uncalled bases in a read. Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases.\n", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--test_method", + "alternatives" : [ + "-g" + ], + "description" : "Specify the type of statistical test. Two options are available. 1: Given the minimum allowed overlap, test using the highest OES. Note that due to its discrete nature, this test usually yields a lower p-value for the assembled read than the cut- off (specified by -p). For example, setting the cut-off to 0.05 using this test, the assembled reads might have an actual p-value of 0.02.\n2. Use the acceptance probability (m.a.p). This test methods computes the same probability as test method 1. However, it assumes that the minimal overlap is the observed overlap with the highest OES, instead of the one specified by -v. Therefore, this is not a valid statistical test and the 'p-value' is in fact the maximal probability for accepting the assembly. Nevertheless, we observed in practice that for the case the actual overlap sizes are relatively small, test 2 can correctly assemble more reads with only slightly higher false-positive rate.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--emperical_freqs", + "alternatives" : [ + "-e" + ], + "description" : "Disable empirical base frequencies.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--score_method", + "alternatives" : [ + "-s" + ], + "description" : "Specify the scoring method. 1. OES with +1 for match and -1 for mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch multiplied by base quality scores. 3: Ignore quality scores and use +1 for a match and -1 for a mismatch.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--phred_base", + "alternatives" : [ + "-b" + ], + "description" : "Base PHRED quality score.\n", + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cap", + "alternatives" : [ + "-c" + ], + "description" : "Specify the upper bound for the resulting quality score. If set to zero, capping is disabled.\n", + "example" : [ + 40 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--nbase", + "alternatives" : [ + "-z" + ], + "description" : "When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. It is fully parallelized and can run with as low as just a few kilobytes of memory.\n\nPEAR evaluates all possible paired-end read overlaps and without requiring the target fragment size as input. In addition, it implements a statistical test for minimizing false-positive results. Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes on a standard desktop computer.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "pair-end", + "read", + "merge" + ], + "license" : "CC-BY-NC-SA-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btt593" + ] + }, + "links" : { + "repository" : "https://github.com/tseemann/PEAR", + "homepage" : "https://cme.h-its.org/exelixis/web/software/pear", + "documentation" : "https://cme.h-its.org/exelixis/web/software/pear/doc.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/pear:0.9.6--h9d449c0_10", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "version=$(pear -h | grep 'PEAR v' | sed 's/PEAR v//' | sed 's/ .*//') && \\\\\necho \\"pear: $version\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/pear/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/pear", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_FORWARD_FASTQ+x} ]; then echo "${VIASH_PAR_FORWARD_FASTQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_forward_fastq='&'#" ; else echo "# par_forward_fastq="; fi ) +$( if [ ! -z ${VIASH_PAR_REVERSE_FASTQ+x} ]; then echo "${VIASH_PAR_REVERSE_FASTQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reverse_fastq='&'#" ; else echo "# par_reverse_fastq="; fi ) +$( if [ ! -z ${VIASH_PAR_ASSEMBLED+x} ]; then echo "${VIASH_PAR_ASSEMBLED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_assembled='&'#" ; else echo "# par_assembled="; fi ) +$( if [ ! -z ${VIASH_PAR_UNASSEMBLED_FORWARD+x} ]; then echo "${VIASH_PAR_UNASSEMBLED_FORWARD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unassembled_forward='&'#" ; else echo "# par_unassembled_forward="; fi ) +$( if [ ! -z ${VIASH_PAR_UNASSEMBLED_REVERSE+x} ]; then echo "${VIASH_PAR_UNASSEMBLED_REVERSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unassembled_reverse='&'#" ; else echo "# par_unassembled_reverse="; fi ) +$( if [ ! -z ${VIASH_PAR_DISCARDED+x} ]; then echo "${VIASH_PAR_DISCARDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_discarded='&'#" ; else echo "# par_discarded="; fi ) +$( if [ ! -z ${VIASH_PAR_P_VALUE+x} ]; then echo "${VIASH_PAR_P_VALUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_p_value='&'#" ; else echo "# par_p_value="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_OVERLAP+x} ]; then echo "${VIASH_PAR_MIN_OVERLAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_overlap='&'#" ; else echo "# par_min_overlap="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_ASSEMBLY_LENGTH+x} ]; then echo "${VIASH_PAR_MAX_ASSEMBLY_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_assembly_length='&'#" ; else echo "# par_max_assembly_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_ASSEMBLY_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_ASSEMBLY_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_assembly_length='&'#" ; else echo "# par_min_assembly_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_TRIM_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_TRIM_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_trim_length='&'#" ; else echo "# par_min_trim_length="; fi ) +$( if [ ! -z ${VIASH_PAR_QUALITY_THRESHOLD+x} ]; then echo "${VIASH_PAR_QUALITY_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quality_threshold='&'#" ; else echo "# par_quality_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_UNCALLED_BASE+x} ]; then echo "${VIASH_PAR_MAX_UNCALLED_BASE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_uncalled_base='&'#" ; else echo "# par_max_uncalled_base="; fi ) +$( if [ ! -z ${VIASH_PAR_TEST_METHOD+x} ]; then echo "${VIASH_PAR_TEST_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_test_method='&'#" ; else echo "# par_test_method="; fi ) +$( if [ ! -z ${VIASH_PAR_EMPERICAL_FREQS+x} ]; then echo "${VIASH_PAR_EMPERICAL_FREQS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_emperical_freqs='&'#" ; else echo "# par_emperical_freqs="; fi ) +$( if [ ! -z ${VIASH_PAR_SCORE_METHOD+x} ]; then echo "${VIASH_PAR_SCORE_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_score_method='&'#" ; else echo "# par_score_method="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED_BASE+x} ]; then echo "${VIASH_PAR_PHRED_BASE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred_base='&'#" ; else echo "# par_phred_base="; fi ) +$( if [ ! -z ${VIASH_PAR_CAP+x} ]; then echo "${VIASH_PAR_CAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cap='&'#" ; else echo "# par_cap="; fi ) +$( if [ ! -z ${VIASH_PAR_NBASE+x} ]; then echo "${VIASH_PAR_NBASE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nbase='&'#" ; else echo "# par_nbase="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +[[ "\\$par_emperical_freqs" == "false" ]] && unset par_emperical_freqs +[[ "\\$par_nbase" == "false" ]] && unset par_nbase + +if [[ "\\${par_forward_fastq##*.}" == "gz" ]]; then + gunzip \\$par_forward_fastq + par_forward_fastq=\\${par_forward_fastq%.*} +fi +if [[ "\\${par_reverse_fastq##*.}" == "gz" ]]; then + gunzip \\$par_reverse_fastq + par_reverse_fastq=\\${par_reverse_fastq%.*} +fi + +output_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "pear.XXXXXX") + +pear \\\\ + -f "\\$par_forward_fastq" \\\\ + -r "\\$par_reverse_fastq" \\\\ + -o "\\$output_dir" \\\\ + \\${par_p_value:+-p "\\${par_p_value}"} \\\\ + \\${par_min_overlap:+-v "\\${par_min_overlap}"} \\\\ + \\${par_max_assembly_length:+-m "\\${par_max_assembly_length}"} \\\\ + \\${par_min_assembly_length:+-n "\\${par_min_assembly_length}"} \\\\ + \\${par_min_trim_length:+-t "\\${par_min_trim_length}"} \\\\ + \\${par_quality_threshold:+-q "\\${par_quality_threshold}"} \\\\ + \\${par_max_uncalled_base:+-u "\\${par_max_uncalled_base}"} \\\\ + \\${par_test_method:+-g "\\${par_test_method}"} \\\\ + \\${par_score_method:+-s "\\${par_score_method}"} \\\\ + \\${par_phred_base:+-b "\\${par_phred_base}"} \\\\ + \\${meta_memory_mb:+--memory "\\${meta_memory_mb}M"} \\\\ + \\${par_cap:+-c "\\${par_cap}"} \\\\ + \\${meta_cpus:+-j "\\${meta_cpus}"} \\\\ + \\${par_emperical_freqs:+-e} \\\\ + \\${par_nbase:+-z} + + +if [[ "\\${par_assembled##*.}" == "gz" ]]; then + gzip -9 -c \\${output_dir}.assembled.fastq > \\${par_assembled} +else + mv \\${output_dir}.assembled.fastq \\${par_assembled} +fi + +if [[ "\\${par_unassembled_forward##*.}" == "gz" ]]; then + gzip -9 -c \\${output_dir}.unassembled.forward.fastq > \\${par_unassembled_forward} +else + mv \\${output_dir}.unassembled.forward.fastq \\${par_unassembled_forward} +fi + +if [[ "\\${par_unassembled_reverse##*.}" == "gz" ]]; then + gzip -9 -c \\${output_dir}.unassembled.reverse.fastq > \\${par_unassembled_reverse} +else + mv \\${output_dir}.unassembled.reverse.fastq \\${par_unassembled_reverse} +fi + +if [[ "\\${par_discarded##*.}" == "gz" ]]; then + gzip -9 -c \\${output_dir}.discarded.fastq > \\${par_discarded} +else + mv \\${output_dir}.discarded.fastq \\${par_discarded} +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/pear", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow.config new file mode 100644 index 0000000..b4a0ff1 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'pear' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. It is fully parallelized and can run with as low as just a few kilobytes of memory.\n\nPEAR evaluates all possible paired-end read overlaps and without requiring the target fragment size as input. In addition, it implements a statistical test for minimizing false-positive results. Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes on a standard desktop computer.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow_schema.json new file mode 100644 index 0000000..77dee5c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/nextflow_schema.json @@ -0,0 +1,284 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "pear", +"description": "PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. It is fully parallelized and can run with as low as just a few kilobytes of memory.\n\nPEAR evaluates all possible paired-end read overlaps and without requiring the target fragment size as input. In addition, it implements a statistical test for minimizing false-positive results. Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes on a standard desktop computer.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "forward_fastq": { + "type": + "string", + "description": "Type: `file`, required, example: `forward.fastq`. Forward paired-end FASTQ file", + "help_text": "Type: `file`, required, example: `forward.fastq`. Forward paired-end FASTQ file" + + } + + + , + "reverse_fastq": { + "type": + "string", + "description": "Type: `file`, required, example: `reverse.fastq`. Reverse paired-end FASTQ file", + "help_text": "Type: `file`, required, example: `reverse.fastq`. Reverse paired-end FASTQ file" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "assembled": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.assembled.assembled`. The output file containing assembled reads", + "help_text": "Type: `file`, required, default: `$id.$key.assembled.assembled`. The output file containing assembled reads. Can be compressed with gzip." + , + "default": "$id.$key.assembled.assembled" + } + + + , + "unassembled_forward": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.unassembled_forward.unassembled_forward`. The output file containing forward reads that could not be assembled", + "help_text": "Type: `file`, required, default: `$id.$key.unassembled_forward.unassembled_forward`. The output file containing forward reads that could not be assembled. Can be compressed with gzip." + , + "default": "$id.$key.unassembled_forward.unassembled_forward" + } + + + , + "unassembled_reverse": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.unassembled_reverse.unassembled_reverse`. The output file containing reverse reads that could not be assembled", + "help_text": "Type: `file`, required, default: `$id.$key.unassembled_reverse.unassembled_reverse`. The output file containing reverse reads that could not be assembled. Can be compressed with gzip." + , + "default": "$id.$key.unassembled_reverse.unassembled_reverse" + } + + + , + "discarded": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.discarded.discarded`. The output file containing reads that were discarded due to too low quality or too many uncalled bases", + "help_text": "Type: `file`, required, default: `$id.$key.discarded.discarded`. The output file containing reads that were discarded due to too low quality or too many uncalled bases. Can be compressed with gzip." + , + "default": "$id.$key.discarded.discarded" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "p_value": { + "type": + "number", + "description": "Type: `double`, example: `0.01`. Specify a p-value for the statistical test", + "help_text": "Type: `double`, example: `0.01`. Specify a p-value for the statistical test. If the computed p-value of a possible assembly exceeds the specified p-value then paired-end read will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables the test.\n" + + } + + + , + "min_overlap": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. Specify the minimum overlap size", + "help_text": "Type: `integer`, example: `10`. Specify the minimum overlap size. The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles.\n" + + } + + + , + "max_assembly_length": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Specify the maximum possible length of the assembled sequences", + "help_text": "Type: `integer`, example: `0`. Specify the maximum possible length of the assembled sequences. Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long.\n" + + } + + + , + "min_assembly_length": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Specify the minimum possible length of the assembled sequences", + "help_text": "Type: `integer`, example: `0`. Specify the minimum possible length of the assembled sequences. Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short.\n" + + } + + + , + "min_trim_length": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Specify the minimum length of reads after trimming the low quality part (see option -q)\n", + "help_text": "Type: `integer`, example: `1`. Specify the minimum length of reads after trimming the low quality part (see option -q)\n" + + } + + + , + "quality_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Specify the quality threshold for trimming the low quality part of a read", + "help_text": "Type: `integer`, example: `0`. Specify the quality threshold for trimming the low quality part of a read. If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed.\n" + + } + + + , + "max_uncalled_base": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. Specify the maximal proportion of uncalled bases in a read", + "help_text": "Type: `double`, example: `1.0`. Specify the maximal proportion of uncalled bases in a read. Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases.\n" + + } + + + , + "test_method": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Specify the type of statistical test", + "help_text": "Type: `integer`, example: `1`. Specify the type of statistical test. Two options are available. 1: Given the minimum allowed overlap, test using the highest OES. Note that due to its discrete nature, this test usually yields a lower p-value for the assembled read than the cut- off (specified by -p). For example, setting the cut-off to 0.05 using this test, the assembled reads might have an actual p-value of 0.02.\n2. Use the acceptance probability (m.a.p). This test methods computes the same probability as test method 1. However, it assumes that the minimal overlap is the observed overlap with the highest OES, instead of the one specified by -v. Therefore, this is not a valid statistical test and the \u0027p-value\u0027 is in fact the maximal probability for accepting the assembly. Nevertheless, we observed in practice that for the case the actual overlap sizes are relatively small, test 2 can correctly assemble more reads with only slightly higher false-positive rate.\n" + + } + + + , + "emperical_freqs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable empirical base frequencies", + "help_text": "Type: `boolean_true`, default: `false`. Disable empirical base frequencies.\n" + , + "default": "False" + } + + + , + "score_method": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Specify the scoring method", + "help_text": "Type: `integer`, example: `2`. Specify the scoring method. 1. OES with +1 for match and -1 for mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch multiplied by base quality scores. 3: Ignore quality scores and use +1 for a match and -1 for a mismatch.\n" + + } + + + , + "phred_base": { + "type": + "integer", + "description": "Type: `integer`, example: `33`. Base PHRED quality score", + "help_text": "Type: `integer`, example: `33`. Base PHRED quality score.\n" + + } + + + , + "cap": { + "type": + "integer", + "description": "Type: `integer`, example: `40`. Specify the upper bound for the resulting quality score", + "help_text": "Type: `integer`, example: `40`. Specify the upper bound for the resulting quality score. If set to zero, capping is disabled.\n" + + } + + + , + "nbase": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases\n", + "help_text": "Type: `boolean_true`, default: `false`. When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/.config.vsh.yaml new file mode 100644 index 0000000..c9523df --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/.config.vsh.yaml @@ -0,0 +1,406 @@ +name: "samtools_stats" +namespace: "samtools" +version: "v0.1.0" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input file.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bai" + description: "Index file.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fasta" + description: "Reference file the CRAM was created with.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--coverage" + alternatives: + - "-c" + description: "Coverage distribution min,max,step [1,1000,1].\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + - type: "boolean_true" + name: "--remove_dups" + alternatives: + - "-d" + description: "Exclude from statistics reads marked as duplicates.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--customized_index_file" + alternatives: + - "-X" + description: "Use a customized index file.\n" + info: null + direction: "input" + - type: "string" + name: "--required_flag" + alternatives: + - "-f" + description: "Required flag, 0 for unset. See also `samtools flags`.\n" + info: null + default: + - "0" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--filtering_flag" + alternatives: + - "-F" + description: "Filtering flag, 0 for unset. See also `samtools flags`.\n" + info: null + default: + - "0" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--GC_depth" + description: "The size of GC-depth bins (decreasing bin size increases memory\ + \ requirement).\n" + info: null + default: + - 20000.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--insert_size" + alternatives: + - "-i" + description: "Maximum insert size.\n" + info: null + default: + - 8000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--id" + alternatives: + - "-I" + description: "Include only listed read group or sample name.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--read_length" + alternatives: + - "-l" + description: "Include in the statistics only reads with the given read length.\n" + info: null + default: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--most_inserts" + alternatives: + - "-m" + description: "Report only the main part of inserts.\n" + info: null + default: + - 0.99 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--split_prefix" + alternatives: + - "-P" + description: "Path or string prefix for filepaths output by --split (default is\ + \ input filename).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--trim_quality" + alternatives: + - "-q" + description: "The BWA trimming parameter.\n" + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--ref_seq" + alternatives: + - "-r" + description: "Reference sequence (required for GC-depth and mismatches-per-cycle\ + \ calculation).\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--split" + alternatives: + - "-S" + description: "Also write statistics to separate files split by tagged field.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--target_regions" + alternatives: + - "-t" + description: "Do stats in these regions only. Tab-delimited file chr,from,to,\ + \ 1-based, inclusive.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sparse" + alternatives: + - "-x" + description: "Suppress outputting IS rows where there are no insertions.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--remove_overlaps" + alternatives: + - "-p" + description: "Remove overlaps of paired-end reads from coverage and base count\ + \ computations.\n" + info: null + direction: "input" + - type: "integer" + name: "--cov_threshold" + alternatives: + - "-g" + description: "Only bases with coverage above this value will be included in the\ + \ target percentage computation.\n" + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--input_fmt_option" + description: "Specify a single input file format option in the form of OPTION\ + \ or OPTION=VALUE.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reference" + description: "Reference sequence FASTA FILE.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output file.\n" + info: null + default: + - "out.txt" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Reports alignment summary statistics for a BAM file." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "statistics" +- "counts" +- "bam" +- "sam" +- "cram" +license: "MIT/Expat" +references: + doi: + - "10.1093/bioinformatics/btp352" + - "10.1093/gigascience/giab008" +links: + repository: "https://github.com/samtools/samtools" + homepage: "https://www.htslib.org/" + documentation: "https://www.htslib.org/doc/samtools-stats.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\ + \ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/samtools/samtools_stats/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/samtools/samtools_stats" + executable: "target/nextflow/samtools/samtools_stats/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/main.nf new file mode 100644 index 0000000..167148f --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/main.nf @@ -0,0 +1,3836 @@ +// samtools_stats v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "samtools_stats", + "namespace" : "samtools", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "Index file.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fasta", + "description" : "Reference file the CRAM was created with.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--coverage", + "alternatives" : [ + "-c" + ], + "description" : "Coverage distribution min,max,step [1,1000,1].\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : "," + }, + { + "type" : "boolean_true", + "name" : "--remove_dups", + "alternatives" : [ + "-d" + ], + "description" : "Exclude from statistics reads marked as duplicates.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--customized_index_file", + "alternatives" : [ + "-X" + ], + "description" : "Use a customized index file.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--required_flag", + "alternatives" : [ + "-f" + ], + "description" : "Required flag, 0 for unset. See also `samtools flags`.\n", + "default" : [ + "0" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--filtering_flag", + "alternatives" : [ + "-F" + ], + "description" : "Filtering flag, 0 for unset. See also `samtools flags`.\n", + "default" : [ + "0" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--GC_depth", + "description" : "The size of GC-depth bins (decreasing bin size increases memory requirement).\n", + "default" : [ + 20000.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--insert_size", + "alternatives" : [ + "-i" + ], + "description" : "Maximum insert size.\n", + "default" : [ + 8000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--id", + "alternatives" : [ + "-I" + ], + "description" : "Include only listed read group or sample name.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--read_length", + "alternatives" : [ + "-l" + ], + "description" : "Include in the statistics only reads with the given read length.\n", + "default" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--most_inserts", + "alternatives" : [ + "-m" + ], + "description" : "Report only the main part of inserts.\n", + "default" : [ + 0.99 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--split_prefix", + "alternatives" : [ + "-P" + ], + "description" : "Path or string prefix for filepaths output by --split (default is input filename).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--trim_quality", + "alternatives" : [ + "-q" + ], + "description" : "The BWA trimming parameter.\n", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref_seq", + "alternatives" : [ + "-r" + ], + "description" : "Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--split", + "alternatives" : [ + "-S" + ], + "description" : "Also write statistics to separate files split by tagged field.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--target_regions", + "alternatives" : [ + "-t" + ], + "description" : "Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sparse", + "alternatives" : [ + "-x" + ], + "description" : "Suppress outputting IS rows where there are no insertions.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--remove_overlaps", + "alternatives" : [ + "-p" + ], + "description" : "Remove overlaps of paired-end reads from coverage and base count computations.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--cov_threshold", + "alternatives" : [ + "-g" + ], + "description" : "Only bases with coverage above this value will be included in the target percentage computation.\n", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--input_fmt_option", + "description" : "Specify a single input file format option in the form of OPTION or OPTION=VALUE.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "Reference sequence FASTA FILE.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output file.\n", + "default" : [ + "out.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Reports alignment summary statistics for a BAM file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "statistics", + "counts", + "bam", + "sam", + "cram" + ], + "license" : "MIT/Expat", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btp352", + "10.1093/gigascience/giab008" + ] + }, + "links" : { + "repository" : "https://github.com/samtools/samtools", + "homepage" : "https://www.htslib.org/", + "documentation" : "https://www.htslib.org/doc/samtools-stats.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\\\nsed 's#Using ##;s# \\\\([0-9\\\\.]*\\\\)$#: \\\\1#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/samtools/samtools_stats/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/samtools/samtools_stats", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTA+x} ]; then echo "${VIASH_PAR_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fasta='&'#" ; else echo "# par_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE+x} ]; then echo "${VIASH_PAR_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coverage='&'#" ; else echo "# par_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_REMOVE_DUPS+x} ]; then echo "${VIASH_PAR_REMOVE_DUPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_remove_dups='&'#" ; else echo "# par_remove_dups="; fi ) +$( if [ ! -z ${VIASH_PAR_CUSTOMIZED_INDEX_FILE+x} ]; then echo "${VIASH_PAR_CUSTOMIZED_INDEX_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_customized_index_file='&'#" ; else echo "# par_customized_index_file="; fi ) +$( if [ ! -z ${VIASH_PAR_REQUIRED_FLAG+x} ]; then echo "${VIASH_PAR_REQUIRED_FLAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_required_flag='&'#" ; else echo "# par_required_flag="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERING_FLAG+x} ]; then echo "${VIASH_PAR_FILTERING_FLAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtering_flag='&'#" ; else echo "# par_filtering_flag="; fi ) +$( if [ ! -z ${VIASH_PAR_GC_DEPTH+x} ]; then echo "${VIASH_PAR_GC_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_GC_depth='&'#" ; else echo "# par_GC_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_INSERT_SIZE+x} ]; then echo "${VIASH_PAR_INSERT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_insert_size='&'#" ; else echo "# par_insert_size="; fi ) +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_LENGTH+x} ]; then echo "${VIASH_PAR_READ_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_length='&'#" ; else echo "# par_read_length="; fi ) +$( if [ ! -z ${VIASH_PAR_MOST_INSERTS+x} ]; then echo "${VIASH_PAR_MOST_INSERTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_most_inserts='&'#" ; else echo "# par_most_inserts="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT_PREFIX+x} ]; then echo "${VIASH_PAR_SPLIT_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split_prefix='&'#" ; else echo "# par_split_prefix="; fi ) +$( if [ ! -z ${VIASH_PAR_TRIM_QUALITY+x} ]; then echo "${VIASH_PAR_TRIM_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trim_quality='&'#" ; else echo "# par_trim_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_REF_SEQ+x} ]; then echo "${VIASH_PAR_REF_SEQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref_seq='&'#" ; else echo "# par_ref_seq="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi ) +$( if [ ! -z ${VIASH_PAR_TARGET_REGIONS+x} ]; then echo "${VIASH_PAR_TARGET_REGIONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_target_regions='&'#" ; else echo "# par_target_regions="; fi ) +$( if [ ! -z ${VIASH_PAR_SPARSE+x} ]; then echo "${VIASH_PAR_SPARSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sparse='&'#" ; else echo "# par_sparse="; fi ) +$( if [ ! -z ${VIASH_PAR_REMOVE_OVERLAPS+x} ]; then echo "${VIASH_PAR_REMOVE_OVERLAPS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_remove_overlaps='&'#" ; else echo "# par_remove_overlaps="; fi ) +$( if [ ! -z ${VIASH_PAR_COV_THRESHOLD+x} ]; then echo "${VIASH_PAR_COV_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cov_threshold='&'#" ; else echo "# par_cov_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FMT_OPTION+x} ]; then echo "${VIASH_PAR_INPUT_FMT_OPTION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_fmt_option='&'#" ; else echo "# par_input_fmt_option="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +[[ "\\$par_remove_dups" == "false" ]] && unset par_remove_dups +[[ "\\$par_customized_index_file" == "false" ]] && unset par_customized_index_file +[[ "\\$par_sparse" == "false" ]] && unset par_sparse +[[ "\\$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps + +samtools stats \\\\ + \\${par_coverage:+-c "\\$par_coverage"} \\\\ + \\${par_remove_dups:+-d} \\\\ + \\${par_required_flag:+-f "\\$par_required_flag"} \\\\ + \\${par_filtering_flag:+-F "\\$par_filtering_flag"} \\\\ + \\${par_GC_depth:+--GC-depth "\\$par_GC_depth"} \\\\ + \\${par_insert_size:+-i "\\$par_insert_size"} \\\\ + \\${par_id:+-I "\\$par_id"} \\\\ + \\${par_read_length:+-l "\\$par_read_length"} \\\\ + \\${par_most_inserts:+-m "\\$par_most_inserts"} \\\\ + \\${par_split_prefix:+-P "\\$par_split_prefix"} \\\\ + \\${par_trim_quality:+-q "\\$par_trim_quality"} \\\\ + \\${par_ref_seq:+-r "\\$par_ref_seq"} \\\\ + \\${par_split:+-S "\\$par_split"} \\\\ + \\${par_target_regions:+-t "\\$par_target_regions"} \\\\ + \\${par_sparse:+-x} \\\\ + \\${par_remove_overlaps:+-p} \\\\ + \\${par_cov_threshold:+-g "\\$par_cov_threshold"} \\\\ + \\${par_input_fmt_option:+-O "\\$par_input_fmt_option"} \\\\ + \\${par_reference:+-R "\\$par_reference"} \\\\ + "\\$par_input" \\\\ + > "\\$par_output" + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/samtools/samtools_stats", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow.config new file mode 100644 index 0000000..d746263 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'samtools/samtools_stats' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'Reports alignment summary statistics for a BAM file.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow_schema.json new file mode 100644 index 0000000..ceb3b49 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/nextflow_schema.json @@ -0,0 +1,327 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_stats", +"description": "Reports alignment summary statistics for a BAM file.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input file", + "help_text": "Type: `file`, required. Input file.\n" + + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. Index file", + "help_text": "Type: `file`. Index file.\n" + + } + + + , + "fasta": { + "type": + "string", + "description": "Type: `file`. Reference file the CRAM was created with", + "help_text": "Type: `file`. Reference file the CRAM was created with.\n" + + } + + + , + "coverage": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\",\"`. Coverage distribution min,max,step [1,1000,1]", + "help_text": "Type: List of `integer`, multiple_sep: `\",\"`. Coverage distribution min,max,step [1,1000,1].\n" + + } + + + , + "remove_dups": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Exclude from statistics reads marked as duplicates", + "help_text": "Type: `boolean_true`, default: `false`. Exclude from statistics reads marked as duplicates.\n" + , + "default": "False" + } + + + , + "customized_index_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use a customized index file", + "help_text": "Type: `boolean_true`, default: `false`. Use a customized index file.\n" + , + "default": "False" + } + + + , + "required_flag": { + "type": + "string", + "description": "Type: `string`, default: `0`. Required flag, 0 for unset", + "help_text": "Type: `string`, default: `0`. Required flag, 0 for unset. See also `samtools flags`.\n" + , + "default": "0" + } + + + , + "filtering_flag": { + "type": + "string", + "description": "Type: `string`, default: `0`. Filtering flag, 0 for unset", + "help_text": "Type: `string`, default: `0`. Filtering flag, 0 for unset. See also `samtools flags`.\n" + , + "default": "0" + } + + + , + "GC_depth": { + "type": + "number", + "description": "Type: `double`, default: `20000.0`. The size of GC-depth bins (decreasing bin size increases memory requirement)", + "help_text": "Type: `double`, default: `20000.0`. The size of GC-depth bins (decreasing bin size increases memory requirement).\n" + , + "default": "20000.0" + } + + + , + "insert_size": { + "type": + "integer", + "description": "Type: `integer`, default: `8000`. Maximum insert size", + "help_text": "Type: `integer`, default: `8000`. Maximum insert size.\n" + , + "default": "8000" + } + + + , + "id": { + "type": + "string", + "description": "Type: `string`. Include only listed read group or sample name", + "help_text": "Type: `string`. Include only listed read group or sample name.\n" + + } + + + , + "read_length": { + "type": + "integer", + "description": "Type: `integer`, default: `-1`. Include in the statistics only reads with the given read length", + "help_text": "Type: `integer`, default: `-1`. Include in the statistics only reads with the given read length.\n" + , + "default": "-1" + } + + + , + "most_inserts": { + "type": + "number", + "description": "Type: `double`, default: `0.99`. Report only the main part of inserts", + "help_text": "Type: `double`, default: `0.99`. Report only the main part of inserts.\n" + , + "default": "0.99" + } + + + , + "split_prefix": { + "type": + "string", + "description": "Type: `string`. Path or string prefix for filepaths output by --split (default is input filename)", + "help_text": "Type: `string`. Path or string prefix for filepaths output by --split (default is input filename).\n" + + } + + + , + "trim_quality": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. The BWA trimming parameter", + "help_text": "Type: `integer`, default: `0`. The BWA trimming parameter.\n" + , + "default": "0" + } + + + , + "ref_seq": { + "type": + "string", + "description": "Type: `file`. Reference sequence (required for GC-depth and mismatches-per-cycle calculation)", + "help_text": "Type: `file`. Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n" + + } + + + , + "split": { + "type": + "string", + "description": "Type: `string`. Also write statistics to separate files split by tagged field", + "help_text": "Type: `string`. Also write statistics to separate files split by tagged field.\n" + + } + + + , + "target_regions": { + "type": + "string", + "description": "Type: `file`. Do stats in these regions only", + "help_text": "Type: `file`. Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n" + + } + + + , + "sparse": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress outputting IS rows where there are no insertions", + "help_text": "Type: `boolean_true`, default: `false`. Suppress outputting IS rows where there are no insertions.\n" + , + "default": "False" + } + + + , + "remove_overlaps": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Remove overlaps of paired-end reads from coverage and base count computations", + "help_text": "Type: `boolean_true`, default: `false`. Remove overlaps of paired-end reads from coverage and base count computations.\n" + , + "default": "False" + } + + + , + "cov_threshold": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Only bases with coverage above this value will be included in the target percentage computation", + "help_text": "Type: `integer`, default: `0`. Only bases with coverage above this value will be included in the target percentage computation.\n" + , + "default": "0" + } + + + , + "input_fmt_option": { + "type": + "string", + "description": "Type: `string`. Specify a single input file format option in the form of OPTION or OPTION=VALUE", + "help_text": "Type: `string`. Specify a single input file format option in the form of OPTION or OPTION=VALUE.\n" + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`. Reference sequence FASTA FILE", + "help_text": "Type: `file`. Reference sequence FASTA FILE.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.txt`. Output file", + "help_text": "Type: `file`, required, default: `$id.$key.output.txt`. Output file.\n" + , + "default": "$id.$key.output.txt" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/.config.vsh.yaml new file mode 100644 index 0000000..41253ce --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/.config.vsh.yaml @@ -0,0 +1,2121 @@ +name: "star_align_reads" +namespace: "star" +version: "v0.1.0" +argument_groups: +- name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: null + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Genome Parameters" + arguments: + - type: "file" + name: "--genomeDir" + description: "path to the directory where genome files are stored (for --runMode\ + \ alignReads) or will be generated (for --runMode generateGenome)" + info: null + example: + - "./GenomeDir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--genomeLoad" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: null + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--genomeFileSizes" + description: "genome files exact sizes in bytes. Typically, this should not be\ + \ defined by the user." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--genomeTransformOutput" + description: "which output to transform back to original genome\n\n- SAM ...\ + \ SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant \ + \ ... quantifications (from --quantMode option)\n- None ... no transformation\ + \ of the output" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--genomeChrSetMitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: null + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be supplied\ + \ and will be concatenated." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: null + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: null + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n-\ + \ None ... do not keep any tags" + info: null + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads:\ + \ read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but\ + \ not tabs are allowed in file names.\nIf read_group_line does not start with\ + \ ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line\ + \ starts with ID:, it can contain several fields separated by $tab$, and all\ + \ fields will be be copied verbatim into SAM @RG header line." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both mates\ + \ are the same / not the same. NotEqual is safe in all situations." + info: null + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: null + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based on\ + \ Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ...\ + \ no adapter clipping, all other clip* parameters are disregarded" + info: null + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value is\ + \ given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value is\ + \ given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence\ + \ with the length equal to read length" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each mate.\ + \ If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter clipping.\ + \ If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value is\ + \ given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: null + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: null + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will be\ + \ set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory\ + \ option." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: null + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one mate\ + \ of a paired end read) reads in separate file(s).\n\n- None ... no output\n\ + - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: null + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMtype" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limitBAMsortRAM." + info: null + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: null + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number\ + \ of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard\ + \ SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart\ + \ (=1 by default). Standard SAM tag.\n- AS ... local alignment score,\ + \ +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE\ + \ reads, total score for two mates. Stadnard SAM tag.\n- nM ... number\ + \ of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance\ + \ to the reference (number of mismatched + inserted + deleted bases) for each\ + \ mate. Standard SAM tag.\n- MD ... string encoding mismatched and\ + \ deleted reference bases (see standard SAM specifications). Standard SAM tag.\n\ + - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical;\ + \ 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions\ + \ database is used, and a junction is annotated, 20 is added to its motif value.\n\ + - jI ... start and end of introns for all junctions (1-based).\n- XS\ + \ ... alignment strand according to --outSAMstrandField.\n- MC \ + \ ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all\ + \ segment of all chimeric alingments for --chimOutType WithinBAM output.\n-\ + \ cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n\ + - vA ... variant allele\n- vG ... genomic coordinate of the\ + \ variant overlapped by the read.\n- vW ... 1 - alignment passes WASP\ + \ filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires\ + \ --waspOutputMode SAMtag.\n- ha ... haplotype (1/2) when mapping to\ + \ the diploid genome. Requires genome generated with --genomeTransformType Diploid\ + \ .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes\ + \ and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene\ + \ name for unique-gene reads.\n- gx gn ... gene IDs and gene names for\ + \ unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes\ + \ and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n\ + - sM ... assessment of CB and UMI.\n- sS ... sequence of the\ + \ entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire\ + \ barcode.\n- sF ... type of feature overlap and number of features\ + \ for each alignment\n***Unsupported/undocumented:\n- rB ... alignment\ + \ block read/genomic coordinates.\n- vR ... read coordinate of the\ + \ variant." + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM file\ + \ (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for\ + \ each alignment, and, in case of unsorted output, keep it adjacent to its mapped\ + \ mate. Only affects multi-mapping reads." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: null + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the\ + \ best score is primary\n- AllBestScore ... all alignments with the best score\ + \ are primary" + info: null + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space) from\ + \ the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number\ + \ (index) in the FASTx file" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: null + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by\ + \ STAR, and after outSAMflagAND. Can be used to set specific bits that are not\ + \ set otherwise." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by\ + \ STAR, but before outSAMflagOR. Can be used to unset specific bits that are\ + \ not set otherwise." + info: null + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn. Commas\ + \ have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz\ + \ \"DS:z z\" , ID:yyy DS:yyyy" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax)\ + \ will be output" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n-\ + \ 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?), 0=no\ + \ compression, 10=maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- -\ + \ ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only\ + \ 2nd read" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: null + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per million\ + \ of mapped reads\n- None ... no normalization, \"raw\" counts" + info: null + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: null + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than this\ + \ value.\n\nOtherwise no alignments will be output, and the read will be counted\ + \ as \"mapped to too many loci\" in the Log.final.out ." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: null + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum of\ + \ mates' lengths for paired-end reads)" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter out\ + \ alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: null + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\n\ + - All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping\ + \ reads only" + info: null + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\ + \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply\ + \ to annotated junctions" + info: null + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and\ + \ GT/AT motif. -1 means no output for that motif\n\nJunctions are output if\ + \ one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are\ + \ satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\ + \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions\ + \ are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: null + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2\ + \ reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: null + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the alignment:\ + \ scoreGenomicLengthLog2scale*log2(genomicLength)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is split\ + \ into pieces no longer than this value" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is not\ + \ limited" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the stitching\ + \ procedure" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: null + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: null + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined by\ + \ (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3)\ + \ GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: null + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: null + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum\ + \ number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion as concordant\ + \ pairs\n- DiscordantPair ... report alignments with non-zero\ + \ protrusion as discordant pairs" + info: null + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the chromosomes\n\ + \n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks" + info: null + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping mates\"\ + \ algorithm." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation of\ + \ anchors into one window" + info: null + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in a\ + \ window, for STARlong algorithm only." + info: null + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for STARlong\ + \ algorithm only." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n-\ + \ WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n-\ + \ WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: null + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric output" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of all\ + \ chimeric segments) from the read length" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: null + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the old\ + \ scheme for chimeric detection which only considered unique alignments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ... no\ + \ comment lines/headers\n- 1 ... comment lines at the end of the file: command\ + \ line and Nreads: total, unique/multi-mapping" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM\ + \ output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10\ + \ ... maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--quantTranscriptomeSAMoutput" + description: "alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip\ + \ ... prohibit indels and single-end alignments, extend softclips - compatible\ + \ with RSEM\n- BanSingleEnd ... prohibit single-end alignments,\ + \ allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end\ + \ alignments, extend softclips, allow indels" + info: null + example: + - "BanSingleEnd_BanIndels_ExtendSoftclip" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of the\ + \ original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature\ + \ Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\ + \n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The single-end or paired-end R1 FastQ files to be processed." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--input_r2" + description: "The paired-end R2 FastQ files to be processed. Only required if\ + \ --input is a paired-end R1 file." + info: null + example: + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--aligned_reads" + description: "The output file containing the aligned reads." + info: null + example: + - "aligned_reads.bam" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reads_per_gene" + description: "The output file containing the number of reads per gene." + info: null + example: + - "reads_per_gene.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unmapped" + description: "The output file containing the unmapped reads." + info: null + example: + - "unmapped.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--unmapped_r2" + description: "The output file containing the unmapped R2 reads." + info: null + example: + - "unmapped_r2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chimeric_junctions" + description: "The output file containing the chimeric junctions." + info: null + example: + - "chimeric_junctions.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--log" + description: "The output file containing the log of the alignment process." + info: null + example: + - "log.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--splice_junctions" + description: "The output file containing the splice junctions." + info: null + example: + - "splice_junctions.tsv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +description: "Aligns reads to a reference genome using STAR.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "align" +- "fasta" +- "genome" +license: "MIT" +references: + doi: + - "10.1093/bioinformatics/bts635" +links: + repository: "https://github.com/alexdobin/STAR" + documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.12-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.1.0" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "gzip" + - "bzip2" + interactive: false + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + env: + - "STAR_VERSION 2.7.11b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + - type: "docker" + run: + - "STAR --version | sed 's#\\(.*\\)#star: \"\\1\"#' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/star/star_align_reads/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/star/star_align_reads" + executable: "target/nextflow/star/star_align_reads/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "b84b29747d0635f2ac83ea63b496be9a9edb6724" + git_remote: "https://github.com/viash-hub/biobox" +package_config: + name: "biobox" + version: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/main.nf b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/main.nf new file mode 100644 index 0000000..10b0c96 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/main.nf @@ -0,0 +1,5812 @@ +// star_align_reads v0.1.0 +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "star_align_reads", + "namespace" : "star", + "version" : "v0.1.0", + "argument_groups" : [ + { + "name" : "Run Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--runRNGseed", + "description" : "random number generator seed.", + "example" : [ + 777 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Genome Parameters", + "arguments" : [ + { + "type" : "file", + "name" : "--genomeDir", + "description" : "path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)", + "example" : [ + "./GenomeDir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genomeLoad", + "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", + "example" : [ + "NoSharedMemory" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genomeFastaFiles", + "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--genomeFileSizes", + "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genomeTransformOutput", + "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant ... quantifications (from --quantMode option)\n- None ... no transformation of the output", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--genomeChrSetMitochondrial", + "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", + "example" : [ + "chrM", + "M", + "MT" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Splice Junctions Database", + "arguments" : [ + { + "type" : "string", + "name" : "--sjdbFileChrStartEnd", + "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sjdbGTFfile", + "description" : "path to the GTF file with annotations", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFchrPrefix", + "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFfeatureExon", + "description" : "feature type in GTF file to be used as exons for building transcripts", + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentTranscript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneName", + "description" : "GTF attribute name for parent gene name", + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneType", + "description" : "GTF attribute name for parent gene type", + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sjdbOverhang", + "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--sjdbScore", + "description" : "extra alignment score for alignments that cross database junctions", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sjdbInsertSave", + "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", + "example" : [ + "Basic" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Variation parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--varVCFfile", + "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--readFilesType", + "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", + "example" : [ + "Fastx" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readFilesSAMattrKeep", + "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--readFilesManifest", + "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readFilesPrefix", + "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readFilesCommand", + "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--readMapNumber", + "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readMatesLengthsIn", + "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", + "example" : [ + "NotEqual" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readNameSeparator", + "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "example" : [ + "/" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--readQualityScoreBase", + "description" : "number to be subtracted from the ASCII code to get Phred quality score", + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Read Clipping", + "arguments" : [ + { + "type" : "string", + "name" : "--clipAdapterType", + "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", + "example" : [ + "Hamming" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip3pNbases", + "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--clip3pAdapterSeq", + "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--clip3pAdapterMMp", + "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip3pAfterAdapterNbases", + "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--clip5pNbases", + "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Limits", + "arguments" : [ + { + "type" : "long", + "name" : "--limitGenomeGenerateRAM", + "description" : "maximum available RAM (bytes) for genome generation", + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limitIObufferSize", + "description" : "max available buffers size (bytes) for input/output, per thread", + "example" : [ + 30000000, + 50000000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limitOutSAMoneReadBytes", + "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", + "example" : [ + 100000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limitOutSJoneRead", + "description" : "max number of junctions for one read (including all multi-mappers)", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limitOutSJcollapsed", + "description" : "max number of collapsed junctions", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "long", + "name" : "--limitBAMsortRAM", + "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limitSjdbInsertNsj", + "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--limitNreadsSoft", + "description" : "soft limit on the number of reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output: general", + "arguments" : [ + { + "type" : "string", + "name" : "--outTmpKeep", + "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outStd", + "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", + "example" : [ + "Log" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outReadsUnmapped", + "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outQSconversionAdd", + "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outMultimapperOrder", + "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", + "example" : [ + "Old_2.4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output: SAM and BAM", + "arguments" : [ + { + "type" : "string", + "name" : "--outSAMtype", + "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.", + "example" : [ + "SAM" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMmode", + "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", + "example" : [ + "Full" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMstrandField", + "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMattributes", + "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n- sF ... type of feature overlap and number of features for each alignment\n***Unsupported/undocumented:\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMattrIHstart", + "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMunmapped", + "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMorder", + "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "example" : [ + "Paired" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMprimaryFlag", + "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", + "example" : [ + "OneBestScore" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMreadID", + "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMmapqUnique", + "description" : "0 to 255: the MAPQ value for unique mappers", + "example" : [ + 255 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMflagOR", + "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMflagAND", + "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", + "example" : [ + 65535 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMattrRGline", + "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMheaderHD", + "description" : "@HD (header) line of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMheaderPG", + "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMheaderCommentFile", + "description" : "path to the file with @CO (comment) lines of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outSAMfilter", + "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMmultNmax", + "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSAMtlen", + "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outBAMcompression", + "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outBAMsortingThreadN", + "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outBAMsortingBinsN", + "description" : ">0: number of genome bins for coordinate-sorting", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "BAM processing", + "arguments" : [ + { + "type" : "string", + "name" : "--bamRemoveDuplicatesType", + "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bamRemoveDuplicatesMate2basesN", + "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Wiggle", + "arguments" : [ + { + "type" : "string", + "name" : "--outWigType", + "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outWigStrand", + "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", + "example" : [ + "Stranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outWigReferencesPrefix", + "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outWigNorm", + "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", + "example" : [ + "RPM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Filtering", + "arguments" : [ + { + "type" : "string", + "name" : "--outFilterType", + "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", + "example" : [ + "Normal" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapScoreRange", + "description" : "the score range below the maximum score for multimapping alignments", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapNmax", + "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outFilterMismatchNmax", + "description" : "alignment will be output only if it has no more mismatches than this value.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverLmax", + "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", + "example" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverReadLmax", + "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outFilterScoreMin", + "description" : "alignment will be output only if its score is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--outFilterScoreMinOverLread", + "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outFilterMatchNmin", + "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--outFilterMatchNminOverLread", + "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outFilterIntronMotifs", + "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--outFilterIntronStrands", + "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", + "example" : [ + "RemoveInconsistentStrands" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output splice junctions (SJ.out.tab)", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJtype", + "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output Filtering: Splice Junctions", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJfilterReads", + "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSJfilterOverhangMin", + "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", + "example" : [ + 30, + 12, + 12, + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountUniqueMin", + "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountTotalMin", + "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSJfilterDistToOtherSJmin", + "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", + "example" : [ + 10, + 0, + 5, + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--outSJfilterIntronMaxVsReadN", + "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", + "example" : [ + 50000, + 100000, + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Scoring", + "arguments" : [ + { + "type" : "integer", + "name" : "--scoreGap", + "description" : "splice junction penalty (independent on intron motif)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreGapNoncan", + "description" : "non-canonical junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreGapGCAG", + "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreGapATAC", + "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreGenomicLengthLog2scale", + "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreDelOpen", + "description" : "deletion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreDelBase", + "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreInsOpen", + "description" : "insertion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreInsBase", + "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--scoreStitchSJshift", + "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Alignments and Seeding", + "arguments" : [ + { + "type" : "integer", + "name" : "--seedSearchStartLmax", + "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--seedSearchStartLmaxOverLread", + "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedSearchLmax", + "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedMultimapNmax", + "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedPerReadNmax", + "description" : "max number of seeds per read", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedPerWindowNmax", + "description" : "max number of seeds per window", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedNoneLociPerWindow", + "description" : "max number of one seed loci per window", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedSplitMin", + "description" : "min length of the seed sequences split by Ns or mate gap", + "example" : [ + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seedMapMin", + "description" : "min length of seeds to be mapped", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignIntronMin", + "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", + "example" : [ + 21 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignIntronMax", + "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignMatesGapMax", + "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignSJoverhangMin", + "description" : "minimum overhang (i.e. block size) for spliced alignments", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignSJstitchMismatchNmax", + "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", + "example" : [ + 0, + -1, + 0, + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignSJDBoverhangMin", + "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignSplicedMateMapLmin", + "description" : "minimum mapped length for a read mate that is spliced", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--alignSplicedMateMapLminOverLmate", + "description" : "alignSplicedMateMapLmin normalized to mate length", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignWindowsPerReadNmax", + "description" : "max number of windows per read", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerWindowNmax", + "description" : "max number of transcripts per window", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerReadNmax", + "description" : "max number of different alignments per read to consider", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--alignEndsType", + "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", + "example" : [ + "Local" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--alignEndsProtrude", + "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", + "example" : [ + "0 ConcordantPair" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--alignSoftClipAtReferenceEnds", + "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", + "example" : [ + "Yes" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--alignInsertionFlush", + "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Paired-End reads", + "arguments" : [ + { + "type" : "integer", + "name" : "--peOverlapNbasesMin", + "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--peOverlapMMp", + "description" : "maximum proportion of mismatched bases in the overlap area", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Windows, Anchors, Binning", + "arguments" : [ + { + "type" : "integer", + "name" : "--winAnchorMultimapNmax", + "description" : "max number of loci anchors are allowed to map to", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--winBinNbits", + "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--winAnchorDistNbins", + "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", + "example" : [ + 9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--winFlankNbins", + "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--winReadCoverageRelativeMin", + "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", + "example" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--winReadCoverageBasesMin", + "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Chimeric Alignments", + "arguments" : [ + { + "type" : "string", + "name" : "--chimOutType", + "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (defa''' + '''ult) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", + "example" : [ + "Junctions" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimSegmentMin", + "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimScoreMin", + "description" : "minimum total (summed) score of the chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimScoreDropMax", + "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimScoreSeparation", + "description" : "minimum difference (separation) between the best chimeric score and the next one", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimScoreJunctionNonGTAG", + "description" : "penalty for a non-GT/AG chimeric junction", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimJunctionOverhangMin", + "description" : "minimum overhang for a chimeric junction", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimSegmentReadGapMax", + "description" : "maximum gap in the read sequence between chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chimFilter", + "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", + "example" : [ + "banGenomicN" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimMainSegmentMultNmax", + "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimMultimapNmax", + "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimMultimapScoreRange", + "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimNonchimScoreDropMin", + "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--chimOutJunctionFormat", + "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Quantification of Annotations", + "arguments" : [ + { + "type" : "string", + "name" : "--quantMode", + "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--quantTranscriptomeBAMcompression", + "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--quantTranscriptomeSAMoutput", + "description" : "alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM\n- BanSingleEnd ... prohibit single-end alignments, allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels", + "example" : [ + "BanSingleEnd_BanIndels_ExtendSoftclip" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "2-pass Mapping", + "arguments" : [ + { + "type" : "string", + "name" : "--twopassMode", + "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--twopass1readsN", + "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "WASP parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--waspOutputMode", + "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--readFilesIn" + ], + "description" : "The single-end or paired-end R1 FastQ files to be processed.", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_r2", + "description" : "The paired-end R2 FastQ files to be processed. Only required if --input is a paired-end R1 file.", + "example" : [ + "mysample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--aligned_reads", + "description" : "The output file containing the aligned reads.", + "example" : [ + "aligned_reads.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reads_per_gene", + "description" : "The output file containing the number of reads per gene.", + "example" : [ + "reads_per_gene.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unmapped", + "description" : "The output file containing the unmapped reads.", + "example" : [ + "unmapped.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--unmapped_r2", + "description" : "The output file containing the unmapped R2 reads.", + "example" : [ + "unmapped_r2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chimeric_junctions", + "description" : "The output file containing the chimeric junctions.", + "example" : [ + "chimeric_junctions.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--log", + "description" : "The output file containing the log of the alignment process.", + "example" : [ + "log.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--splice_junctions", + "description" : "The output file containing the splice junctions.", + "example" : [ + "splice_junctions.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + } + ], + "description" : "Aligns reads to a reference genome using STAR.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "align", + "fasta", + "genome" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts635" + ] + }, + "links" : { + "repository" : "https://github.com/alexdobin/STAR", + "documentation" : "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.12-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.1.0", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "gzip", + "bzip2" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ], + "env" : [ + "STAR_VERSION 2.7.11b", + "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd" + ] + }, + { + "type" : "docker", + "run" : [ + "STAR --version | sed 's#\\\\(.*\\\\)#star: \\"\\\\1\\"#' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/star/star_align_reads/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/star/star_align_reads", + "viash_version" : "0.9.0-RC6", + "git_commit" : "b84b29747d0635f2ac83ea63b496be9a9edb6724", + "git_remote" : "https://github.com/viash-hub/biobox" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.1.0", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import tempfile +import subprocess +import shutil +from pathlib import Path + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'genomeDir': $( if [ ! -z ${VIASH_PAR_GENOMEDIR+x} ]; then echo "r'${VIASH_PAR_GENOMEDIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantTranscriptomeSAMoutput': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMESAMOUTPUT+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMESAMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'aligned_reads': $( if [ ! -z ${VIASH_PAR_ALIGNED_READS+x} ]; then echo "r'${VIASH_PAR_ALIGNED_READS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reads_per_gene': $( if [ ! -z ${VIASH_PAR_READS_PER_GENE+x} ]; then echo "r'${VIASH_PAR_READS_PER_GENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'unmapped': $( if [ ! -z ${VIASH_PAR_UNMAPPED+x} ]; then echo "r'${VIASH_PAR_UNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'unmapped_r2': $( if [ ! -z ${VIASH_PAR_UNMAPPED_R2+x} ]; then echo "r'${VIASH_PAR_UNMAPPED_R2//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'chimeric_junctions': $( if [ ! -z ${VIASH_PAR_CHIMERIC_JUNCTIONS+x} ]; then echo "r'${VIASH_PAR_CHIMERIC_JUNCTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'log': $( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "r'${VIASH_PAR_LOG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'splice_junctions': $( if [ ! -z ${VIASH_PAR_SPLICE_JUNCTIONS+x} ]; then echo "r'${VIASH_PAR_SPLICE_JUNCTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +################################################## +# check and process SE / PE R1 input files +input_r1 = par["input"] +readFilesIn = ",".join(par["input"]) +par["input"] = None + +# check and process PE R2 input files +input_r2 = par["input_r2"] +if input_r2 is not None: + if len(input_r1) != len(input_r2): + raise ValueError("The number of R1 and R2 files do not match.") + readFilesIn = [readFilesIn, ",".join(par["input_r2"])] + par["input_r2"] = None + +# store readFilesIn +par["readFilesIn"] = readFilesIn + +################################################## + +# determine readFilesCommand +if input_r1[0].endswith(".gz"): + print(">> Input files are gzipped, setting readFilesCommand to zcat", flush=True) + par["readFilesCommand"] = "zcat" +elif input_r1[0].endswith(".bz2"): + print(">> Input files are bzipped, setting readFilesCommand to bzcat", flush=True) + par["readFilesCommand"] = "bzcat" + +################################################## +# store output paths +expected_outputs = { + "aligned_reads": ["Aligned.out.sam", "Aligned.out.bam"], + "reads_per_gene": "ReadsPerGene.out.tab", + "chimeric_junctions": "Chimeric.out.junction", + "log": "Log.final.out", + "splice_junctions": "SJ.out.tab", + "unmapped": "Unmapped.out.mate1", + "unmapped_r2": "Unmapped.out.mate2" +} +output_paths = {name: par[name] for name in expected_outputs.keys()} +for name in expected_outputs.keys(): + par[name] = None + +################################################## +# process other args +par["runMode"] = "alignReads" + +if "cpus" in meta and meta["cpus"]: + par["runThreadN"] = meta["cpus"] + +################################################## +# run STAR and move output to final destination +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Constructing command", flush=True) + + # set output paths + temp_dir = Path(temp_dir) + par["outTmpDir"] = temp_dir / "tempdir" + out_dir = temp_dir / "out" + par["outFileNamePrefix"] = f"{out_dir}/" # star needs this slash + + # construct command + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + val_to_add = value if isinstance(value, list) else [value] + cmd_args.extend([f"--{name}"] + [str(x) for x in val_to_add]) + print("", flush=True) + + # run command + print(">> Running STAR with command:", flush=True) + print(f"+ {' '.join(cmd_args)}", end="\\\\n\\\\n", flush=True) + subprocess.run( + cmd_args, + check=True + ) + print(">> STAR finished successfully", end="\\\\n\\\\n", flush=True) + + # move output to final destination + print(">> Moving output to final destination", flush=True) + for name, paths in expected_outputs.items(): + for expected_path in [paths] if isinstance(paths, str) else paths: + expected_full_path = out_dir / expected_path + if output_paths[name] and expected_full_path.is_file(): + print(f">> Moving {expected_path} to {output_paths[name]}", flush=True) + shutil.move(expected_full_path, output_paths[name]) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/star/star_align_reads", + "tag" : "v0.1.0" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow.config new file mode 100644 index 0000000..54b1a38 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'star/star_align_reads' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.1.0' + description = 'Aligns reads to a reference genome using STAR.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow_schema.json new file mode 100644 index 0000000..6c1dc14 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/nextflow_schema.json @@ -0,0 +1,171 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_align_reads", +"description": "Aligns reads to a reference genome using STAR.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\":\"`. The single-end or paired-end R1 FastQ files to be processed", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\":\"`. The single-end or paired-end R1 FastQ files to be processed." + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\":\"`. The paired-end R2 FastQ files to be processed", + "help_text": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\":\"`. The paired-end R2 FastQ files to be processed. Only required if --input is a paired-end R1 file." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "aligned_reads": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads", + "help_text": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads." + , + "default": "$id.$key.aligned_reads.bam" + } + + + , + "reads_per_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene", + "help_text": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene." + , + "default": "$id.$key.reads_per_gene.tsv" + } + + + , + "unmapped": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads", + "help_text": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads." + , + "default": "$id.$key.unmapped.fastq" + } + + + , + "unmapped_r2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads", + "help_text": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads." + , + "default": "$id.$key.unmapped_r2.fastq" + } + + + , + "chimeric_junctions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions", + "help_text": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions." + , + "default": "$id.$key.chimeric_junctions.tsv" + } + + + , + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process", + "help_text": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process." + , + "default": "$id.$key.log.txt" + } + + + , + "splice_junctions": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions", + "help_text": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions." + , + "default": "$id.$key.splice_junctions.tsv" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping_and_qc/.config.vsh.yaml b/target/nextflow/mapping_and_qc/.config.vsh.yaml new file mode 100644 index 0000000..9be9983 --- /dev/null +++ b/target/nextflow/mapping_and_qc/.config.vsh.yaml @@ -0,0 +1,204 @@ +name: "mapping_and_qc" +version: "main" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input_r1" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_r2" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reference" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--multiqc_output" + info: null + example: + - "multiqc.html" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +description: "Run STAR and QC" +info: null +status: "enabled" +requirements: + commands: + - "ps" +dependencies: +- name: "cutadapt" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +- name: "pear" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +- name: "falco" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +- name: "multiqc" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +- name: "star/star_align_reads" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +- name: "samtools/samtools_stats" + repository: + type: "vsh" + repo: "vsh/biobox" + tag: "v0.1" +repositories: +- type: "vsh" + name: "bb" + repo: "vsh/biobox" + tag: "v0.1" +license: "MIT" +links: + repository: "https://github.com/viash-hub/playground" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/mapping_and_qc/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/mapping_and_qc" + executable: "target/nextflow/mapping_and_qc/main.nf" + viash_version: "0.9.0-RC6" + git_commit: "ec3c23e349a796e57e6634e140325afde4515bbf" + git_remote: "https://github.com/viash-hub/playground" + dependencies: + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt" + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear" + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco" + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc" + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads" + - "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats" +package_config: + name: "playground" + version: "main" + description: "A collection of bioinformatics pipelines to illustrate the use of\ + \ biobox (and biotools).\n" + info: null + viash_version: "0.9.0-RC6" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "sequence" + - "high-throughput" + - "mapping" + - "counting" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/playground" + issue_tracker: "https://github.com/viash-hub/playground/issues" diff --git a/target/nextflow/mapping_and_qc/main.nf b/target/nextflow/mapping_and_qc/main.nf new file mode 100644 index 0000000..6072477 --- /dev/null +++ b/target/nextflow/mapping_and_qc/main.nf @@ -0,0 +1,3268 @@ +// mapping_and_qc main +// +// This wrapper script is auto-generated by viash 0.9.0-RC6 and is thus a +// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from +// Data Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(";") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$key', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + def chModifiedFiltered = workflowArgs.filter ? + chModified | filter{workflowArgs.filter(it)} : + chModified + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModifiedFiltered.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModifiedFiltered + chPassthrough = Channel.empty() + } + + def chArgs = workflowArgs.fromState ? + chRun | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRun | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chModifiedFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "mapping_and_qc", + "version" : "main", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input_r1", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_r2", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reference", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--multiqc_output", + "example" : [ + "multiqc.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + } + ], + "description" : "Run STAR and QC", + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "cutadapt", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + }, + { + "name" : "pear", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + }, + { + "name" : "falco", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + }, + { + "name" : "multiqc", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + }, + { + "name" : "star/star_align_reads", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + }, + { + "name" : "samtools/samtools_stats", + "repository" : { + "type" : "vsh", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "bb", + "repo" : "vsh/biobox", + "tag" : "v0.1" + } + ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/playground" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/mapping_and_qc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/mapping_and_qc", + "viash_version" : "0.9.0-RC6", + "git_commit" : "ec3c23e349a796e57e6634e140325afde4515bbf", + "git_remote" : "https://github.com/viash-hub/playground" + }, + "package_config" : { + "name" : "playground", + "version" : "main", + "description" : "A collection of bioinformatics pipelines to illustrate the use of biobox (and biotools).\n", + "viash_version" : "0.9.0-RC6", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "high-throughput", + "mapping", + "counting", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/playground", + "issue_tracker" : "https://github.com/viash-hub/playground/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { cutadapt } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/cutadapt/main.nf" +include { pear } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/pear/main.nf" +include { falco } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/falco/main.nf" +include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/multiqc/main.nf" +include { star_align_reads } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/star/star_align_reads/main.nf" +include { samtools_stats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | falco.run( + fromState: {id, state -> + def input_list = [state.input_r1, state.input_r2] + [ + "input": input_list, + "outdir": "\$id.falco", + "report_filename": null, + "data_filename": null, + "summary_filename": null, + ] + }, + toState: [ + "output_falco": "outdir", + ] + ) + | cutadapt.run( + fromState: {id, state -> + [ + "input": state.input_r1, + "input_r2": state.input_r2, + "quality_cutoff": "20", // Could make this a parameter + "adapter": "CTGTCTCTTATACACATCT", // Could make this a parameter + "adapter_r2": "CTGTCTCTTATACACATCT", // Could make this a parameter + "output": "*.fastq", + ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trimmed_r1": output_state["output"][0], + "trimmed_r2": output_state["output"][1], + "output_cutadapt": output_state["output"] + ] + def new_state = state + newKeys + return new_state + } + ) + | pear.run( + fromState: [ + "forward_fastq": "trimmed_r1", + "reverse_fastq": "trimmed_r2", + ], + toState: [ + "output_pear": "assembled", + ] + ) + | star_align_reads.run( + fromState: [ + "input": "output_pear", + "genomeDir": "reference", + ], + toState: [ + "output_star": "aligned_reads", + ] + ) + | samtools_stats.run( + fromState: [ + "input": "output_star", + ], + toState: [ + "output_samtools_stats": "output", + ] + ) + | toSortedList() + | map { events -> + def new_id = "multiqc" + def join_id = events[0][0] + def bam_samtools_stats_dirs = events.collect{it[1].output_samtools_stats.getParent()} + def falco_dirs = events.collect{it[1].output_falco} + return [new_id, ["input": bam_samtools_stats_dirs + falco_dirs, "_meta": ["join_id": join_id]]] + } + | multiqc.run( + fromState: [ + "input": "input", + "output_report": "multiqc_output", + ], + toState: [ + "multiqc_output": "output_report", + ] + ) + | setState(["multiqc_output", "_meta"]) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/mapping_and_qc/nextflow.config b/target/nextflow/mapping_and_qc/nextflow.config new file mode 100644 index 0000000..13dccfb --- /dev/null +++ b/target/nextflow/mapping_and_qc/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'mapping_and_qc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' + description = 'Run STAR and QC' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping_and_qc/nextflow_schema.json b/target/nextflow/mapping_and_qc/nextflow_schema.json new file mode 100644 index 0000000..cb9a4bd --- /dev/null +++ b/target/nextflow/mapping_and_qc/nextflow_schema.json @@ -0,0 +1,115 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "mapping_and_qc", +"description": "Run STAR and QC", +"type": "object", +"definitions": { + + + + "input arguments" : { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input_r1": { + "type": + "string", + "description": "Type: `file`, required. ", + "help_text": "Type: `file`, required. " + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: `file`, required. ", + "help_text": "Type: `file`, required. " + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required. ", + "help_text": "Type: `file`, required. " + + } + + +} +}, + + + "output arguments" : { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "multiqc_output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.multiqc_output.html`, example: `multiqc.html`. ", + "help_text": "Type: `file`, required, default: `$id.$key.multiqc_output.html`, example: `multiqc.html`. " + , + "default": "$id.$key.multiqc_output.html" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input arguments" + }, + + { + "$ref": "#/definitions/output arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/test_data.sh b/test_data.sh new file mode 100755 index 0000000..581c3fe --- /dev/null +++ b/test_data.sh @@ -0,0 +1,69 @@ +#/usr/bin/env bash + +set -eo pipefail + +# ensure that the command below is run from the root of the repository +REPO_ROOT=$(git rev-parse --show-toplevel) +cd "$REPO_ROOT" + +TEST_DATA_DIR="test_data" +mkdir -p "$TEST_DATA_DIR" +if [ ! -f "$TEST_DATA_DIR/SRR1569895_1.fastq" ] || [ ! -f "$TEST_DATA_DIR/SRR1569895_2.fastq" ]; then + docker run -t --rm -v $PWD:/output:rw -w /output/test_data ncbi/sra-tools fasterq-dump -e 2 -p SRR1569895 +fi +if [ ! -f "$TEST_DATA_DIR/SRR1570800_1.fastq" ] || [ ! -f "$TEST_DATA_DIR/SRR1570800_2.fastq" ]; then + docker run -t --rm -v $PWD:/output:rw -w /output/test_data ncbi/sra-tools fasterq-dump -e 2 -p SRR1570800 +fi + +export NXF_SCM_FILE="$TEST_DATA_DIR/scm.config" + +cat > $NXF_SCM_FILE << EOF +providers { + vsh { + platform = 'gitlab' + server = 'https://viash-hub.com/' + } +} +EOF + +if [ ! -f "$TEST_DATA_DIR/S288C_reference_genome_Current_Release.tgz" ]; then + wget http://sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases/S288C_reference_genome_Current_Release.tgz \ + -O "$TEST_DATA_DIR/S288C_reference_genome_Current_Release.tgz" +fi + +if [ ! -d "$TEST_DATA_DIR/S288C_reference_genome_Current_Release" ]; then + nextflow run vsh/craftbox -hub vsh -r main -main-script target/nextflow/untar/main.nf \ + -profile docker \ + --input "$TEST_DATA_DIR/S288C_reference_genome_Current_Release.tgz" \ + --output "S288C_reference_genome_Current_Release" \ + --publish_dir "$TEST_DATA_DIR" +fi + +zcat "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/S288C_reference_sequence_R64-5-1_20240529.fsa.gz" > "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/S288C_reference_sequence_R64-5-1_20240529.fsa" +zcat "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/saccharomyces_cerevisiae_R64-5-1_20240529.gff.gz" > "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/saccharomyces_cerevisiae_R64-5-1_20240529.gff" +sed -i -e 's/^.*chromosome=\(.*\)\]$/>chr\1/' "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/S288C_reference_sequence_R64-5-1_20240529.fsa" + +if [ ! -d "$TEST_DATA_DIR/S288C_reference_genome_Current_Release_STAR" ]; then + nextflow run vsh/biobox -hub vsh -r main -main-script target/nextflow/star/star_genome_generate/main.nf \ + -profile docker \ + --genomeFastaFiles "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/S288C_reference_sequence_R64-5-1_20240529.fsa" \ + --sjdbGTFfile "$TEST_DATA_DIR/S288C_reference_genome_Current_Release/saccharomyces_cerevisiae_R64-5-1_20240529.gff" \ + --sjdbGTFtagExonParentTranscript Parent \ + --sjdbOverhang 100 \ + --publish_dir "$TEST_DATA_DIR" \ + --sjdbGTFfeatureExon noncoding_exon \ + --index S288C_reference_genome_Current_Release_STAR +fi + +PARAMS_FILE=params_file.yaml +cat > $PARAMS_FILE << EOF +param_list: + - id: SRR1569895 + input_r1: $TEST_DATA_DIR/SRR1569895_1.fastq + input_r2: $TEST_DATA_DIR/SRR1569895_2.fastq + - id: SRR1570800 + input_r1: $TEST_DATA_DIR/SRR1570800_1.fastq + input_r2: $TEST_DATA_DIR/SRR1570800_2.fastq +publish_dir: foo +reference: $TEST_DATA_DIR/S288C_reference_genome_Current_Release_STAR +EOF