diff --git a/CHANGELOG.md b/CHANGELOG.md index 717ee40c..5821ece2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# biobox 0.3.1 + +## NEW FUNCTIONALITY + +* `bcl_convert`: add `force` argument (PR #171). + # biobox 0.3.0 ## NEW FUNCTIONALITY @@ -49,6 +55,8 @@ * `umi_tools/umi_tools_extract`: Remove `umi_discard_reads` option and change `log2stderr` to input argument (PR #162). +* `star/star_genome_generate`: Fix passing of optional sjdb parameters (PR #170). + ## MINOR CHANGES * `agat_convert_bed2gff`: change type of argument `inflate_off` from `boolean_false` to `boolean_true` (PR #160). diff --git a/_viash.yaml b/_viash.yaml index d08f2fb2..2fec1f37 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -7,7 +7,7 @@ links: issue_tracker: https://github.com/viash-hub/biobox/issues repository: https://github.com/viash-hub/biobox -viash_version: 0.9.0 +viash_version: 0.9.2 config_mods: | .requirements.commands := ['ps'] diff --git a/src/agat/agat_convert_bed2gff/config.vsh.yaml b/src/agat/agat_convert_bed2gff/config.vsh.yaml index 4466b5f1..e091d78f 100644 --- a/src/agat/agat_convert_bed2gff/config.vsh.yaml +++ b/src/agat/agat_convert_bed2gff/config.vsh.yaml @@ -11,6 +11,8 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: ["agat_convert_bed2gff.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] diff --git a/src/agat/agat_convert_embl2gff/config.vsh.yaml b/src/agat/agat_convert_embl2gff/config.vsh.yaml index 99ceec46..19d8c194 100644 --- a/src/agat/agat_convert_embl2gff/config.vsh.yaml +++ b/src/agat/agat_convert_embl2gff/config.vsh.yaml @@ -11,10 +11,11 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: ["agat_convert_embl2gff.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_convert_genscan2gff/config.vsh.yaml b/src/agat/agat_convert_genscan2gff/config.vsh.yaml index 2adce1da..7f54536a 100644 --- a/src/agat/agat_convert_genscan2gff/config.vsh.yaml +++ b/src/agat/agat_convert_genscan2gff/config.vsh.yaml @@ -22,11 +22,10 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - - commands: [agat] + commands: ["agat_convert_genscan2gff.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_convert_mfannot2gff/config.vsh.yaml b/src/agat/agat_convert_mfannot2gff/config.vsh.yaml index 625c4613..b34c942a 100644 --- a/src/agat/agat_convert_mfannot2gff/config.vsh.yaml +++ b/src/agat/agat_convert_mfannot2gff/config.vsh.yaml @@ -12,9 +12,9 @@ links: repository: https://github.com/NBISweden/AGAT references: doi: 10.5281/zenodo.3552717 -license: GPL-3. +license: GPL-3.0 requirements: - - command: [agat] + commands: ["agat_convert_mfannot2gff.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml index 757cbd85..7a3c5be5 100644 --- a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml +++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml @@ -27,10 +27,11 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: ["agat_convert_sp_gff2gtf.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml b/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml index f1c78590..cffdea3a 100644 --- a/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml +++ b/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml @@ -12,10 +12,11 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: ["agat_convert_sp_gff2tsv.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml b/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml index 9e77b09d..4ad05673 100644 --- a/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml +++ b/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml @@ -21,10 +21,11 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +requirements: + commands: ["agat_convert_sp_gxf2gxf.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_sp_add_introns/config.vsh.yaml b/src/agat/agat_sp_add_introns/config.vsh.yaml index 06ec8474..3916c350 100644 --- a/src/agat/agat_sp_add_introns/config.vsh.yaml +++ b/src/agat/agat_sp_add_introns/config.vsh.yaml @@ -12,11 +12,10 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - commands: [agat] + commands: ["agat_sp_add_introns.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml b/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml index 0608ad4d..80857e23 100644 --- a/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml +++ b/src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml @@ -15,11 +15,10 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - - commands: [agat] + commands: ["agat_sp_filter_feature_from_kill_list.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_sp_merge_annotations/config.vsh.yaml b/src/agat/agat_sp_merge_annotations/config.vsh.yaml index bc47921a..ba87f446 100644 --- a/src/agat/agat_sp_merge_annotations/config.vsh.yaml +++ b/src/agat/agat_sp_merge_annotations/config.vsh.yaml @@ -13,7 +13,7 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - commands: [agat] + commands: ["agat_sp_merge_annotations.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] diff --git a/src/agat/agat_sp_statistics/config.vsh.yaml b/src/agat/agat_sp_statistics/config.vsh.yaml index 6890bb84..930ff156 100644 --- a/src/agat/agat_sp_statistics/config.vsh.yaml +++ b/src/agat/agat_sp_statistics/config.vsh.yaml @@ -19,11 +19,10 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - - commands: [agat] + commands: ["agat_sp_statistics.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] - argument_groups: - name: Inputs arguments: diff --git a/src/agat/agat_sq_stat_basic/config.vsh.yaml b/src/agat/agat_sq_stat_basic/config.vsh.yaml index 64958991..a0f05830 100644 --- a/src/agat/agat_sq_stat_basic/config.vsh.yaml +++ b/src/agat/agat_sq_stat_basic/config.vsh.yaml @@ -12,7 +12,7 @@ references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 requirements: - - commands: [agat] + commands: ["agat_sq_stat_basic.pl"] authors: - __merge__: /src/_authors/leila_paquay.yaml roles: [ author, maintainer ] diff --git a/src/bcl_convert/config.vsh.yaml b/src/bcl_convert/config.vsh.yaml index 81103776..a2c584d7 100644 --- a/src/bcl_convert/config.vsh.yaml +++ b/src/bcl_convert/config.vsh.yaml @@ -134,6 +134,13 @@ argument_groups: required: false description: Reports directory example: logs_dir + - name: "--force" + description: | + Allow destination directory to already exist and overwrite files. + type: boolean + required: false + example: true + # bcl-convert arguments not taken into account # --force diff --git a/src/bcl_convert/script.sh b/src/bcl_convert/script.sh index 6a59acd5..09873062 100644 --- a/src/bcl_convert/script.sh +++ b/src/bcl_convert/script.sh @@ -2,9 +2,13 @@ set -eo pipefail +[[ "$par_force" == "false" ]] && unset par_force + + $(which bcl-convert) \ --bcl-input-directory "$par_bcl_input_directory" \ --output-directory "$par_output_directory" \ + ${par_force:+--force} \ ${par_sample_sheet:+ --sample-sheet "$par_sample_sheet"} \ ${par_run_info:+ --run-info "$par_run_info"} \ ${par_bcl_only_lane:+ --bcl-only-lane "$par_bcl_only_lane"} \ diff --git a/src/star/star_genome_generate/script.sh b/src/star/star_genome_generate/script.sh index fc232672..02f5e9bb 100644 --- a/src/star/star_genome_generate/script.sh +++ b/src/star/star_genome_generate/script.sh @@ -17,10 +17,10 @@ STAR \ ${par_genome_sa_index_nbases:+--genomeSAindexNbases "${par_genome_sa_index_nbases}"} \ ${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "${par_sjdb_gtf_chr_prefix}"} \ ${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "${par_sjdb_gtf_feature_exon}"} \ - ${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtag_exon_parent_transcript "${par_sjdb_gtf_tag_exon_parent_transcript}"} \ - ${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtag_exon_parent_gene "${par_sjdb_gtf_tag_exon_parent_gene}"} \ - ${par_sjdb_gtf_tag_exon_parent_geneName:+--sjdbGTFtag_exon_parent_geneName "${par_sjdb_gtf_tag_exon_parent_geneName}"} \ - ${par_sjdb_gtf_tag_exon_parent_geneType:+--sjdbGTFtag_exon_parent_geneType "${sjdbGTFtag_exon_parent_geneType}"} \ + ${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtagExonParentTranscript "${par_sjdb_gtf_tag_exon_parent_transcript}"} \ + ${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtagExonParentGene "${par_sjdb_gtf_tag_exon_parent_gene}"} \ + ${sjdb_gtf_tag_exon_parent_gene_name:+--sjdbGTFtagExonParentGeneName "${sjdb_gtf_tag_exon_parent_gene_name}"} \ + ${sjdb_gtf_tag_exon_parent_gene_type:+--sjdbGTFtagExonParentGeneType "${sjdb_gtf_tag_exon_parent_gene_type}"} \ ${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "${par_limit_genome_generate_ram}"} \ ${par_genome_chr_bin_nbits:+--genomeChrBinNbits "${par_genome_chr_bin_nbits}"} \ ${par_genome_sa_sparse_d:+--genomeSAsparseD "${par_genome_sa_sparse_d}"} \ diff --git a/src/star/star_genome_generate/test.sh b/src/star/star_genome_generate/test.sh index 681f3494..efec575b 100644 --- a/src/star/star_genome_generate/test.sh +++ b/src/star/star_genome_generate/test.sh @@ -44,5 +44,28 @@ grep -q "200" "star_index/chrLength.txt" || (echo "Chromosome length in file 'ch grep -q "chr1" "star_index/chrName.txt" || (echo "Chromosome name in file 'chrName.txt' is incorrect! " && exit 1) grep -q "chr1 200" "star_index/chrNameLength.txt" || (echo "Chromosome name in file 'chrNameLength.txt' is incorrect! " && exit 1) +echo "> Test optional parameters" +"$meta_executable" \ + ${meta_cpus:+---cpus $meta_cpus} \ + --index "star_index/" \ + --genome_fasta_files "genome.fasta" \ + --sjdb_gtf_file "genes.gtf" \ + --sjdb_overhang 100 \ + --genome_sa_index_nbases 4 \ + --sjdb_gtf_tag_exon_parent_transcript "transcript_id" \ + --sjdb_gtf_tag_exon_parent_gene "gene_id" \ + --sjdb_gtf_tag_exon_parent_gene_name "gene_name" \ + --sjdb_gtf_tag_exon_parent_gene_type "gene_type" \ + --genome_chr_bin_nbits 10 \ + --sjdb_gtf_feature_exon "exon" + +files=("Genome" "Log.out" "SA" "SAindex" "chrLength.txt" "chrName.txt" "chrNameLength.txt" "chrStart.txt" "exonGeTrInfo.tab" "exonInfo.tab" "geneInfo.tab" "genomeParameters.txt" "sjdbInfo.txt" "sjdbList.fromGTF.out.tab" "sjdbList.out.tab" "transcriptInfo.tab") + +echo ">> Check if output exists" +[ ! -d "star_index" ] && echo "Directory 'star_index' does not exist!" && exit 1 +for file in "${files[@]}"; do + [ ! -f "star_index/$file" ] && echo "File '$file' does not exist in 'star_index'." && exit 1 +done + echo ">>> Test finished successfully" exit 0 diff --git a/target/executable/agat/agat_convert_bed2gff/.config.vsh.yaml b/target/executable/agat/agat_convert_bed2gff/.config.vsh.yaml index dc5fffeb..0e5072fd 100644 --- a/target/executable/agat/agat_convert_bed2gff/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_bed2gff/.config.vsh.yaml @@ -133,6 +133,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,16 +237,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_bed2gff" executable: "target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff b/target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff index 21a9a3d6..a04c9e6a 100755 --- a/target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff +++ b/target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff @@ -2,7 +2,7 @@ # agat_convert_bed2gff main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,69 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_bed2gff main" - echo "" - echo "The script takes a bed file as input, and will translate it in gff format. The" - echo "BED format is described here The script converts 0-based, half-open [start-1," - echo "end) bed file to 1-based, closed [start, end] General Feature Format v3 (GFF3)." - echo "" - echo "Inputs:" - echo " --bed" - echo " type: file, required parameter, file must exist" - echo " example: input.bed" - echo " Input bed file that will be converted." - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --gff, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " Output GFF file. If no output file is specified, the output will be" - echo " written to STDOUT." - echo "" - echo "Arguments:" - echo " --source" - echo " type: string" - echo " example: Stringtie" - echo " The source informs about the tool used to produce the data and is stored" - echo " in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc." - echo " [default: data]" - echo "" - echo " --primary_tag" - echo " type: string" - echo " example: gene" - echo " The primary_tag corresponds to the data type and is stored in 3rd field" - echo " of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]" - echo "" - echo " --inflate_off" - echo " type: boolean_true" - echo " By default we inflate the block fields (blockCount, blockSizes," - echo " blockStarts) to create subfeatures of the main feature (primary_tag)." - echo " The type of subfeature created is based on the inflate_type parameter." - echo " If you do not want this inflating behaviour you can deactivate it by" - echo " using the --inflate_off option." - echo "" - echo " --inflate_type" - echo " type: string" - echo " example: exon" - echo " Feature type (3rd column in gff) created when inflate parameter" - echo " activated [default: exon]." - echo "" - echo " --verbose" - echo " type: boolean_true" - echo " add verbosity" - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " Input agat config file. By default AGAT takes as input agat_config.yaml" - echo " file from the working directory if any, otherwise it takes the orignal" - echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" - echo " type: \"agat config --expose\". The --config option gives you the" - echo " possibility to use your own AGAT config file (located elsewhere or named" - echo " differently)." -} # initialise variables VIASH_MODE='run' @@ -515,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_bed2gff" -LABEL org.opencontainers.image.created="2024-12-03T10:33:51Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -632,6 +569,95 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_bed2gff main" + echo "" + echo "The script takes a bed file as input, and will translate it in gff format. The" + echo "BED format is described here The script converts 0-based, half-open [start-1," + echo "end) bed file to 1-based, closed [start, end] General Feature Format v3 (GFF3)." + echo "" + echo "Inputs:" + echo " --bed" + echo " type: file, required parameter, file must exist" + echo " example: input.bed" + echo " Input bed file that will be converted." + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --gff, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " Output GFF file. If no output file is specified, the output will be" + echo " written to STDOUT." + echo "" + echo "Arguments:" + echo " --source" + echo " type: string" + echo " example: Stringtie" + echo " The source informs about the tool used to produce the data and is stored" + echo " in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc." + echo " [default: data]" + echo "" + echo " --primary_tag" + echo " type: string" + echo " example: gene" + echo " The primary_tag corresponds to the data type and is stored in 3rd field" + echo " of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]" + echo "" + echo " --inflate_off" + echo " type: boolean_true" + echo " By default we inflate the block fields (blockCount, blockSizes," + echo " blockStarts) to create subfeatures of the main feature (primary_tag)." + echo " The type of subfeature created is based on the inflate_type parameter." + echo " If you do not want this inflating behaviour you can deactivate it by" + echo " using the --inflate_off option." + echo "" + echo " --inflate_type" + echo " type: string" + echo " example: exon" + echo " Feature type (3rd column in gff) created when inflate parameter" + echo " activated [default: exon]." + echo "" + echo " --verbose" + echo " type: boolean_true" + echo " add verbosity" + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " Input agat config file. By default AGAT takes as input agat_config.yaml" + echo " file from the working directory if any, otherwise it takes the orignal" + echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" + echo " type: \"agat config --expose\". The --config option gives you the" + echo " possibility to use your own AGAT config file (located elsewhere or named" + echo " differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_embl2gff/.config.vsh.yaml b/target/executable/agat/agat_convert_embl2gff/.config.vsh.yaml index c503b95a..2ecc8e80 100644 --- a/target/executable/agat/agat_convert_embl2gff/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_embl2gff/.config.vsh.yaml @@ -123,6 +123,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -224,16 +227,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_embl2gff" executable: "target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff b/target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff index 034dd7dd..a4e89183 100755 --- a/target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff +++ b/target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff @@ -2,7 +2,7 @@ # agat_convert_embl2gff main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,59 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_embl2gff main" - echo "" - echo "The script takes an EMBL file as input, and will translate it in gff format." - echo "" - echo "Inputs:" - echo " --embl" - echo " type: file, required parameter, file must exist" - echo " example: input.embl" - echo " Input EMBL file that will be read." - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --gff, --output" - echo " type: file, output, file must exist" - echo " example: output.gff" - echo " Output GFF file. If no output file is specified, the output will be" - echo " written to STDOUT." - echo "" - echo "Arguments:" - echo " --emblmygff3" - echo " type: boolean_true" - echo " Means that the EMBL flat file comes from the EMBLmyGFF3 software. This" - echo " is an EMBL format dedicated for submission and contains particularity to" - echo " deal with. This parameter is needed to get a proper sequence id in the" - echo " GFF3 from an embl made with EMBLmyGFF3." - echo "" - echo " --pt, -t, --primary_tag" - echo " type: string, multiple values allowed" - echo " example: tag1;tag2" - echo " List of \"primary tag\". Useful to discard or keep specific features." - echo " Multiple tags must be comma-separated." - echo "" - echo " -d, --discard" - echo " type: boolean_true" - echo " Means that primary tags provided by the option \"primary_tag\" will be" - echo " discarded." - echo "" - echo " -k, --keep" - echo " type: boolean_true" - echo " Means that only primary tags provided by the option \"primary_tag\" will" - echo " be kept." - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " Input agat config file. By default AGAT takes as input agat_config.yaml" - echo " file from the working directory if any, otherwise it takes the original" - echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" - echo " type: \"agat config --expose\". The --config option gives you the" - echo " possibility to use your own AGAT config file (located elsewhere or named" - echo " differently)." -} # initialise variables VIASH_MODE='run' @@ -505,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_embl2gff" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -622,6 +569,85 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_embl2gff main" + echo "" + echo "The script takes an EMBL file as input, and will translate it in gff format." + echo "" + echo "Inputs:" + echo " --embl" + echo " type: file, required parameter, file must exist" + echo " example: input.embl" + echo " Input EMBL file that will be read." + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --gff, --output" + echo " type: file, output, file must exist" + echo " example: output.gff" + echo " Output GFF file. If no output file is specified, the output will be" + echo " written to STDOUT." + echo "" + echo "Arguments:" + echo " --emblmygff3" + echo " type: boolean_true" + echo " Means that the EMBL flat file comes from the EMBLmyGFF3 software. This" + echo " is an EMBL format dedicated for submission and contains particularity to" + echo " deal with. This parameter is needed to get a proper sequence id in the" + echo " GFF3 from an embl made with EMBLmyGFF3." + echo "" + echo " --pt, -t, --primary_tag" + echo " type: string, multiple values allowed" + echo " example: tag1;tag2" + echo " List of \"primary tag\". Useful to discard or keep specific features." + echo " Multiple tags must be comma-separated." + echo "" + echo " -d, --discard" + echo " type: boolean_true" + echo " Means that primary tags provided by the option \"primary_tag\" will be" + echo " discarded." + echo "" + echo " -k, --keep" + echo " type: boolean_true" + echo " Means that only primary tags provided by the option \"primary_tag\" will" + echo " be kept." + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " Input agat config file. By default AGAT takes as input agat_config.yaml" + echo " file from the working directory if any, otherwise it takes the original" + echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" + echo " type: \"agat config --expose\". The --config option gives you the" + echo " possibility to use your own AGAT config file (located elsewhere or named" + echo " differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_genscan2gff/.config.vsh.yaml b/target/executable/agat/agat_convert_genscan2gff/.config.vsh.yaml index 6620ba0c..ebc23a02 100644 --- a/target/executable/agat/agat_convert_genscan2gff/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_genscan2gff/.config.vsh.yaml @@ -127,6 +127,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -229,16 +232,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_genscan2gff" executable: "target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff b/target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff index af5b1266..1d16e2e2 100755 --- a/target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff +++ b/target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff @@ -2,7 +2,7 @@ # agat_convert_genscan2gff main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,68 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_genscan2gff main" - echo "" - echo "The script takes a GENSCAN file as input, and will translate it in gff" - echo "format. The GENSCAN format is described" - echo "[here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html)." - echo "" - echo "**Known problem**" - echo "" - echo "You must have submited only DNA sequence, without any header!! Indeed the tool" - echo "expects only DNA" - echo "sequences and does not crash/warn if an header is submited along the" - echo "sequence. e.g If you have an header \">seq\" s-e-q are seen as the 3 first" - echo "nucleotides of the sequence. Then all prediction location are shifted" - echo "accordingly. (checked only on the [online" - echo "version](http://argonaute.mit.edu/GENSCAN.html)." - echo "I don't know if there is the same problem elsewhere.)" - echo "" - echo "Inputs:" - echo " -g, --genscan" - echo " type: file, required parameter, file must exist" - echo " Input genscan bed file that will be converted." - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --gff, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " Output GFF file. If no output file is specified, the output will be" - echo " written to STDOUT." - echo "" - echo "Arguments:" - echo " --source" - echo " type: string" - echo " example: Stringtie" - echo " The source informs about the tool used to produce the data and is stored" - echo " in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc." - echo " [default: data]" - echo "" - echo " --primary_tag" - echo " type: string" - echo " example: gene" - echo " The primary_tag corresponds to the data type and is stored in 3rd field" - echo " of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]" - echo "" - echo " --inflate_type" - echo " type: string" - echo " example: exon" - echo " Feature type (3rd column in gff) created when inflate parameter" - echo " activated [default: exon]." - echo "" - echo " --verbose" - echo " type: boolean_true" - echo " add verbosity" - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT. The \`--config\` option gives you the possibility to" - echo " use your own AGAT config file (located elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -514,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_genscan2gff" -LABEL org.opencontainers.image.created="2024-12-03T10:33:54Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -631,6 +569,94 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_genscan2gff main" + echo "" + echo "The script takes a GENSCAN file as input, and will translate it in gff" + echo "format. The GENSCAN format is described" + echo "[here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html)." + echo "" + echo "**Known problem**" + echo "" + echo "You must have submited only DNA sequence, without any header!! Indeed the tool" + echo "expects only DNA" + echo "sequences and does not crash/warn if an header is submited along the" + echo "sequence. e.g If you have an header \">seq\" s-e-q are seen as the 3 first" + echo "nucleotides of the sequence. Then all prediction location are shifted" + echo "accordingly. (checked only on the [online" + echo "version](http://argonaute.mit.edu/GENSCAN.html)." + echo "I don't know if there is the same problem elsewhere.)" + echo "" + echo "Inputs:" + echo " -g, --genscan" + echo " type: file, required parameter, file must exist" + echo " Input genscan bed file that will be converted." + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --gff, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " Output GFF file. If no output file is specified, the output will be" + echo " written to STDOUT." + echo "" + echo "Arguments:" + echo " --source" + echo " type: string" + echo " example: Stringtie" + echo " The source informs about the tool used to produce the data and is stored" + echo " in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc." + echo " [default: data]" + echo "" + echo " --primary_tag" + echo " type: string" + echo " example: gene" + echo " The primary_tag corresponds to the data type and is stored in 3rd field" + echo " of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]" + echo "" + echo " --inflate_type" + echo " type: string" + echo " example: exon" + echo " Feature type (3rd column in gff) created when inflate parameter" + echo " activated [default: exon]." + echo "" + echo " --verbose" + echo " type: boolean_true" + echo " add verbosity" + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT. The \`--config\` option gives you the possibility to" + echo " use your own AGAT config file (located elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_mfannot2gff/.config.vsh.yaml b/target/executable/agat/agat_convert_mfannot2gff/.config.vsh.yaml index e04e9331..75a84f3d 100644 --- a/target/executable/agat/agat_convert_mfannot2gff/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_mfannot2gff/.config.vsh.yaml @@ -83,6 +83,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -90,7 +93,7 @@ keywords: - "gene annotations" - "GFF" - "Mfannot" -license: "GPL-3." +license: "GPL-3.0" references: doi: - "10.5281/zenodo.3552717" @@ -185,16 +188,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_mfannot2gff" executable: "target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff b/target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff index e25f3622..d451fd0c 100755 --- a/target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff +++ b/target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff @@ -2,7 +2,7 @@ # agat_convert_mfannot2gff main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,34 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_mfannot2gff main" - echo "" - echo "Conversion utility for MFannot \"masterfile\" annotation produced by the" - echo "[MFannot pipeline](http://megasun.bch.umontreal.ca/RNAweasel/). Reports" - echo "GFF3 format." - echo "" - echo "Inputs:" - echo " -m, -i, --mfannot" - echo " type: file, required parameter, file must exist" - echo " example: input.mfannot" - echo " The mfannot input file." - echo "" - echo "Outputs:" - echo " -g, -o, --gff" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " The GFF output file." - echo "" - echo "Arguments:" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT. The \`--config\` option gives you the possibility to" - echo " use your own AGAT config file (located elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -480,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_mfannot2gff" -LABEL org.opencontainers.image.created="2024-12-03T10:34:05Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -597,6 +569,60 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_mfannot2gff main" + echo "" + echo "Conversion utility for MFannot \"masterfile\" annotation produced by the" + echo "[MFannot pipeline](http://megasun.bch.umontreal.ca/RNAweasel/). Reports" + echo "GFF3 format." + echo "" + echo "Inputs:" + echo " -m, -i, --mfannot" + echo " type: file, required parameter, file must exist" + echo " example: input.mfannot" + echo " The mfannot input file." + echo "" + echo "Outputs:" + echo " -g, -o, --gff" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " The GFF output file." + echo "" + echo "Arguments:" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT. The \`--config\` option gives you the possibility to" + echo " use your own AGAT config file (located elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml b/target/executable/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml index adc17a57..46b7793a 100644 --- a/target/executable/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml @@ -126,6 +126,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -227,16 +230,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_sp_gff2gtf" executable: "target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf b/target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf index 69c8d41a..81c39676 100755 --- a/target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf +++ b/target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf @@ -2,7 +2,7 @@ # agat_convert_sp_gff2gtf main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,73 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_sp_gff2gtf main" - echo "" - echo "The script aims to convert any GTF/GFF file into a proper GTF file. Full" - echo "information about the format can be found here:" - echo "https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7" - echo "different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the" - echo "version selected the script will filter out the features that are not" - echo "accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene" - echo "pseudogene) will be converted into gene feature and every level2 feature" - echo "(e.g mRNA ncRNA) will be converted into transcript feature. Using the" - echo "\"relax\" option you will produce a GTF-like output keeping all original" - echo "feature types (3rd column). No modification will occur e.g. mRNA to" - echo "transcript." - echo "" - echo "To be fully GTF compliant all feature have a gene_id and a transcript_id" - echo "attribute. The gene_id is unique identifier for the genomic source of" - echo "the transcript, which is used to group transcripts into genes. The" - echo "transcript_id is a unique identifier for the predicted transcript, which" - echo "is used to group features into transcripts." - echo "" - echo "Inputs:" - echo " -i, --gff" - echo " type: file, required parameter, file must exist" - echo " example: input.gff" - echo " Input GFF/GTF file that will be read" - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --gtf, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gtf" - echo " Output GTF file. If no output file is specified, the output will be" - echo " written to STDOUT." - echo "" - echo "Arguments:" - echo " --gtf_version" - echo " type: string" - echo " example: 3" - echo " choices: [ relax, 1, 2, 2.1, 2.2, 2.5, 3 ]" - echo " Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default value" - echo " from AGAT config file (relax for the default config). The script option" - echo " has the higher priority." - echo " * relax: all feature types are accepted." - echo " * GTF3 (9 feature types accepted): gene, transcript, exon, CDS," - echo " Selenocysteine, start_codon, stop_codon, three_prime_utr and" - echo " five_prime_utr." - echo " * GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR," - echo " start_codon, stop_codon, Selenocysteine." - echo " * GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon," - echo " 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon." - echo " * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon," - echo " exon, 5UTR, 3UTR." - echo " * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon." - echo " * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon," - echo " intron." - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " Input agat config file. By default AGAT takes as input agat_config.yaml" - echo " file from the working directory if any, otherwise it takes the orignal" - echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" - echo " type: \"agat config --expose\". The --config option gives you the" - echo " possibility to use your own AGAT config file (located elsewhere or named" - echo " differently)." -} # initialise variables VIASH_MODE='run' @@ -519,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gff2gtf" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -636,6 +569,99 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_sp_gff2gtf main" + echo "" + echo "The script aims to convert any GTF/GFF file into a proper GTF file. Full" + echo "information about the format can be found here:" + echo "https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7" + echo "different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the" + echo "version selected the script will filter out the features that are not" + echo "accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene" + echo "pseudogene) will be converted into gene feature and every level2 feature" + echo "(e.g mRNA ncRNA) will be converted into transcript feature. Using the" + echo "\"relax\" option you will produce a GTF-like output keeping all original" + echo "feature types (3rd column). No modification will occur e.g. mRNA to" + echo "transcript." + echo "" + echo "To be fully GTF compliant all feature have a gene_id and a transcript_id" + echo "attribute. The gene_id is unique identifier for the genomic source of" + echo "the transcript, which is used to group transcripts into genes. The" + echo "transcript_id is a unique identifier for the predicted transcript, which" + echo "is used to group features into transcripts." + echo "" + echo "Inputs:" + echo " -i, --gff" + echo " type: file, required parameter, file must exist" + echo " example: input.gff" + echo " Input GFF/GTF file that will be read" + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --gtf, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gtf" + echo " Output GTF file. If no output file is specified, the output will be" + echo " written to STDOUT." + echo "" + echo "Arguments:" + echo " --gtf_version" + echo " type: string" + echo " example: 3" + echo " choices: [ relax, 1, 2, 2.1, 2.2, 2.5, 3 ]" + echo " Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default value" + echo " from AGAT config file (relax for the default config). The script option" + echo " has the higher priority." + echo " * relax: all feature types are accepted." + echo " * GTF3 (9 feature types accepted): gene, transcript, exon, CDS," + echo " Selenocysteine, start_codon, stop_codon, three_prime_utr and" + echo " five_prime_utr." + echo " * GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR," + echo " start_codon, stop_codon, Selenocysteine." + echo " * GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon," + echo " 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon." + echo " * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon," + echo " exon, 5UTR, 3UTR." + echo " * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon." + echo " * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon," + echo " intron." + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " Input agat config file. By default AGAT takes as input agat_config.yaml" + echo " file from the working directory if any, otherwise it takes the orignal" + echo " agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally" + echo " type: \"agat config --expose\". The --config option gives you the" + echo " possibility to use your own AGAT config file (located elsewhere or named" + echo " differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml b/target/executable/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml index 57b300b1..727d00b6 100644 --- a/target/executable/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml @@ -86,6 +86,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -187,16 +190,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_sp_gff2tsv" executable: "target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv b/target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv index bab0c23f..bf09bfe4 100755 --- a/target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv +++ b/target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv @@ -2,7 +2,7 @@ # agat_convert_sp_gff2tsv main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,38 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_sp_gff2tsv main" - echo "" - echo "The script aims to convert gtf/gff file into tabulated file. Attribute's" - echo "tags from the 9th column become column titles." - echo "" - echo "Inputs:" - echo " -f, --gff" - echo " type: file, required parameter, file must exist" - echo " example: input.gff" - echo " Input GTF/GFF file." - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " Output GFF file. If no output file is specified, the output will be" - echo " written to STDOUT." - echo "" - echo "Arguments:" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " String - Input agat config file. By default AGAT takes as input" - echo " agat_config.yaml file from the working directory if any," - echo " otherwise it takes the orignal agat_config.yaml shipped with" - echo " AGAT. To get the agat_config.yaml locally type: \"agat config" - echo " --expose\". The --config option gives you the possibility to use" - echo " your own AGAT config file (located elsewhere or named" - echo " differently)." -} # initialise variables VIASH_MODE='run' @@ -484,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gff2tsv" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -601,6 +569,64 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_sp_gff2tsv main" + echo "" + echo "The script aims to convert gtf/gff file into tabulated file. Attribute's" + echo "tags from the 9th column become column titles." + echo "" + echo "Inputs:" + echo " -f, --gff" + echo " type: file, required parameter, file must exist" + echo " example: input.gff" + echo " Input GTF/GFF file." + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " Output GFF file. If no output file is specified, the output will be" + echo " written to STDOUT." + echo "" + echo "Arguments:" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " String - Input agat config file. By default AGAT takes as input" + echo " agat_config.yaml file from the working directory if any," + echo " otherwise it takes the orignal agat_config.yaml shipped with" + echo " AGAT. To get the agat_config.yaml locally type: \"agat config" + echo " --expose\". The --config option gives you the possibility to use" + echo " your own AGAT config file (located elsewhere or named" + echo " differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml b/target/executable/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml index 35c372fb..7af73d04 100644 --- a/target/executable/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml +++ b/target/executable/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml @@ -93,6 +93,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -194,16 +197,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_convert_sp_gxf2gxf" executable: "target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf b/target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf index 524ab4a6..2a6e8d10 100755 --- a/target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf +++ b/target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf @@ -2,7 +2,7 @@ # agat_convert_sp_gxf2gxf main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,47 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_convert_sp_gxf2gxf main" - echo "" - echo "This script fixes and/or standardizes any GTF/GFF file into full sorted" - echo "GTF/GFF file. It AGAT parser removes duplicate features, fixes" - echo "duplicated IDs, adds missing ID and/or Parent attributes, deflates" - echo "factorized attributes (attributes with several parents are duplicated" - echo "with uniq ID), add missing features when possible (e.g. add exon if only" - echo "CDS described, add UTR if CDS and exon described), fix feature locations" - echo "(e.g. check exon is embedded in the parent features mRNA, gene), etc..." - echo "" - echo "All AGAT's scripts with the _sp_ prefix use the AGAT parser, before to" - echo "perform any supplementary task. So, it is not necessary to run this" - echo "script prior the use of any other _sp_ script." - echo "" - echo "Inputs:" - echo " -g, --gtf, --gff, --gxf" - echo " type: file, required parameter, file must exist" - echo " example: input.gff" - echo " String - Input GTF/GFF file. Compressed file with .gz extension is" - echo " accepted." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " String - Output GFF file. If no output file is specified, the output" - echo " will be written to STDOUT." - echo "" - echo "Arguments:" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " String - Input agat config file. By default AGAT takes as input" - echo " agat_config.yaml file from the working directory if any, otherwise it" - echo " takes the original agat_config.yaml shipped with AGAT. To get the" - echo " agat_config.yaml locally type: \"agat config --expose\". The --config" - echo " option gives you the possibility to use your own AGAT config file" - echo " (located elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -493,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gxf2gxf" -LABEL org.opencontainers.image.created="2024-12-03T10:34:05Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -610,6 +569,73 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_convert_sp_gxf2gxf main" + echo "" + echo "This script fixes and/or standardizes any GTF/GFF file into full sorted" + echo "GTF/GFF file. It AGAT parser removes duplicate features, fixes" + echo "duplicated IDs, adds missing ID and/or Parent attributes, deflates" + echo "factorized attributes (attributes with several parents are duplicated" + echo "with uniq ID), add missing features when possible (e.g. add exon if only" + echo "CDS described, add UTR if CDS and exon described), fix feature locations" + echo "(e.g. check exon is embedded in the parent features mRNA, gene), etc..." + echo "" + echo "All AGAT's scripts with the _sp_ prefix use the AGAT parser, before to" + echo "perform any supplementary task. So, it is not necessary to run this" + echo "script prior the use of any other _sp_ script." + echo "" + echo "Inputs:" + echo " -g, --gtf, --gff, --gxf" + echo " type: file, required parameter, file must exist" + echo " example: input.gff" + echo " String - Input GTF/GFF file. Compressed file with .gz extension is" + echo " accepted." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " String - Output GFF file. If no output file is specified, the output" + echo " will be written to STDOUT." + echo "" + echo "Arguments:" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " String - Input agat config file. By default AGAT takes as input" + echo " agat_config.yaml file from the working directory if any, otherwise it" + echo " takes the original agat_config.yaml shipped with AGAT. To get the" + echo " agat_config.yaml locally type: \"agat config --expose\". The --config" + echo " option gives you the possibility to use your own AGAT config file" + echo " (located elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_sp_add_introns/.config.vsh.yaml b/target/executable/agat/agat_sp_add_introns/.config.vsh.yaml index 096aa701..a6fe77a7 100644 --- a/target/executable/agat/agat_sp_add_introns/.config.vsh.yaml +++ b/target/executable/agat/agat_sp_add_introns/.config.vsh.yaml @@ -84,6 +84,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -185,16 +188,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_sp_add_introns" executable: "target/executable/agat/agat_sp_add_introns/agat_sp_add_introns" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_sp_add_introns/agat_sp_add_introns b/target/executable/agat/agat_sp_add_introns/agat_sp_add_introns index 17894379..4d6be9e8 100755 --- a/target/executable/agat/agat_sp_add_introns/agat_sp_add_introns +++ b/target/executable/agat/agat_sp_add_introns/agat_sp_add_introns @@ -2,7 +2,7 @@ # agat_sp_add_introns main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,33 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_sp_add_introns main" - echo "" - echo "Add intronic elements to a gtf/gff file without intron features." - echo "" - echo "Inputs:" - echo " -f, --ref, --reffile, --gff" - echo " type: file, required parameter, file must exist" - echo " example: input.gff" - echo " Input GTF/GFF file." - echo "" - echo "Outputs:" - echo " -o, --out, --outfile, --gtf, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " Output GFF3 file." - echo "" - echo "Arguments:" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT. The \`--config\` option" - echo " gives you the possibility to use your own AGAT config file (located" - echo " elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -479,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_sp_add_introns" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -596,6 +569,59 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_sp_add_introns main" + echo "" + echo "Add intronic elements to a gtf/gff file without intron features." + echo "" + echo "Inputs:" + echo " -f, --ref, --reffile, --gff" + echo " type: file, required parameter, file must exist" + echo " example: input.gff" + echo " Input GTF/GFF file." + echo "" + echo "Outputs:" + echo " -o, --out, --outfile, --gtf, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " Output GFF3 file." + echo "" + echo "Arguments:" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT. The \`--config\` option" + echo " gives you the possibility to use your own AGAT config file (located" + echo " elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml b/target/executable/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml index 2b8e664b..b9ba57ae 100644 --- a/target/executable/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml +++ b/target/executable/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml @@ -133,6 +133,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -235,16 +238,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_sp_filter_feature_from_kill_list" executable: "target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list b/target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list index 9ca9bdd9..efd68e17 100755 --- a/target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list +++ b/target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list @@ -2,7 +2,7 @@ # agat_sp_filter_feature_from_kill_list main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,71 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_sp_filter_feature_from_kill_list main" - echo "" - echo "Remove features based on a kill list. The default behaviour is to look at the" - echo "features's ID." - echo "If the feature has an ID (case insensitive) listed among the kill list it will" - echo "be removed." - echo "Removing a level1 or level2 feature will automatically remove all linked" - echo "subfeatures, and" - echo "removing all children of a feature will automatically remove this feature too." - echo "" - echo "Inputs:" - echo " -f, --ref, --reffile, --gff" - echo " type: file, required parameter, file must exist" - echo " Input GFF3 file that will be read." - echo "" - echo " --kl, --kill_list" - echo " type: file, required parameter, file must exist" - echo " example: kill_list.txt" - echo " Text file containing the kill list. One value per line." - echo "" - echo "Outputs:" - echo " -o, --out, --output" - echo " type: file, required parameter, output, file must exist" - echo " Path to the output GFF file that contains filtered features." - echo "" - echo "Arguments:" - echo " -p, -l, --type" - echo " type: string, multiple values allowed" - echo " Primary tag option, case insensitive, list. Allow to specify the feature" - echo " types that" - echo " will be handled." - echo " You can specify a specific feature by giving its primary tag name" - echo " (column 3) as:" - echo " * cds" - echo " * Gene" - echo " * mRNA" - echo " You can specify directly all the feature of a particular" - echo " level:" - echo " * level2=mRNA,ncRNA,tRNA,etc" - echo " * level3=CDS,exon,UTR,etc." - echo " By default all features are taken into account. Fill the option with the" - echo " value \"all\" will" - echo " have the same behaviour." - echo "" - echo " -a, --attribute" - echo " type: string" - echo " example: ID" - echo " Attribute tag to specify the attribute to analyse. Case sensitive." - echo " Default: ID" - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT." - echo " The \`--config\` option gives you the possibility to use your own AGAT" - echo " config file (located" - echo " elsewhere or named differently)." - echo "" - echo " -v, --verbose" - echo " type: boolean_true" - echo " Verbose option for debugging purpose." -} # initialise variables VIASH_MODE='run' @@ -517,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_sp_filter_feature_from_kill_list" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -634,6 +569,97 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_sp_filter_feature_from_kill_list main" + echo "" + echo "Remove features based on a kill list. The default behaviour is to look at the" + echo "features's ID." + echo "If the feature has an ID (case insensitive) listed among the kill list it will" + echo "be removed." + echo "Removing a level1 or level2 feature will automatically remove all linked" + echo "subfeatures, and" + echo "removing all children of a feature will automatically remove this feature too." + echo "" + echo "Inputs:" + echo " -f, --ref, --reffile, --gff" + echo " type: file, required parameter, file must exist" + echo " Input GFF3 file that will be read." + echo "" + echo " --kl, --kill_list" + echo " type: file, required parameter, file must exist" + echo " example: kill_list.txt" + echo " Text file containing the kill list. One value per line." + echo "" + echo "Outputs:" + echo " -o, --out, --output" + echo " type: file, required parameter, output, file must exist" + echo " Path to the output GFF file that contains filtered features." + echo "" + echo "Arguments:" + echo " -p, -l, --type" + echo " type: string, multiple values allowed" + echo " Primary tag option, case insensitive, list. Allow to specify the feature" + echo " types that" + echo " will be handled." + echo " You can specify a specific feature by giving its primary tag name" + echo " (column 3) as:" + echo " * cds" + echo " * Gene" + echo " * mRNA" + echo " You can specify directly all the feature of a particular" + echo " level:" + echo " * level2=mRNA,ncRNA,tRNA,etc" + echo " * level3=CDS,exon,UTR,etc." + echo " By default all features are taken into account. Fill the option with the" + echo " value \"all\" will" + echo " have the same behaviour." + echo "" + echo " -a, --attribute" + echo " type: string" + echo " example: ID" + echo " Attribute tag to specify the attribute to analyse. Case sensitive." + echo " Default: ID" + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT." + echo " The \`--config\` option gives you the possibility to use your own AGAT" + echo " config file (located" + echo " elsewhere or named differently)." + echo "" + echo " -v, --verbose" + echo " type: boolean_true" + echo " Verbose option for debugging purpose." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_sp_merge_annotations/.config.vsh.yaml b/target/executable/agat/agat_sp_merge_annotations/.config.vsh.yaml index 5b8a8348..ca364083 100644 --- a/target/executable/agat/agat_sp_merge_annotations/.config.vsh.yaml +++ b/target/executable/agat/agat_sp_merge_annotations/.config.vsh.yaml @@ -81,6 +81,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -183,16 +186,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_sp_merge_annotations" executable: "target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations b/target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations index 66313326..6cff0377 100755 --- a/target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations +++ b/target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations @@ -2,7 +2,7 @@ # agat_sp_merge_annotations main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,36 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_sp_merge_annotations main" - echo "" - echo "Merge different gff annotation files into one. It uses the AGAT parser that" - echo "takes care of" - echo "duplicated names and fixes other oddities met in those files." - echo "" - echo "Inputs:" - echo " -f, --gff" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input1.gff;input2.gff" - echo " Input GTF/GFF file(s)." - echo "" - echo "Outputs:" - echo " -o, --out, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.gff" - echo " Output gff3 file where the gene incriminated will be writen." - echo "" - echo "Arguments:" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT." - echo " The \`--config\` option gives you the possibility to use your own AGAT" - echo " config file (located" - echo " elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -482,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_sp_merge_annotations" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -599,6 +569,62 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_sp_merge_annotations main" + echo "" + echo "Merge different gff annotation files into one. It uses the AGAT parser that" + echo "takes care of" + echo "duplicated names and fixes other oddities met in those files." + echo "" + echo "Inputs:" + echo " -f, --gff" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input1.gff;input2.gff" + echo " Input GTF/GFF file(s)." + echo "" + echo "Outputs:" + echo " -o, --out, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.gff" + echo " Output gff3 file where the gene incriminated will be writen." + echo "" + echo "Arguments:" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT." + echo " The \`--config\` option gives you the possibility to use your own AGAT" + echo " config file (located" + echo " elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_sp_statistics/.config.vsh.yaml b/target/executable/agat/agat_sp_statistics/.config.vsh.yaml index 6edc7774..eace4c83 100644 --- a/target/executable/agat/agat_sp_statistics/.config.vsh.yaml +++ b/target/executable/agat/agat_sp_statistics/.config.vsh.yaml @@ -128,6 +128,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -230,16 +233,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_sp_statistics" executable: "target/executable/agat/agat_sp_statistics/agat_sp_statistics" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_sp_statistics/agat_sp_statistics b/target/executable/agat/agat_sp_statistics/agat_sp_statistics index 7158b4c1..c9c3bea6 100755 --- a/target/executable/agat/agat_sp_statistics/agat_sp_statistics +++ b/target/executable/agat/agat_sp_statistics/agat_sp_statistics @@ -2,7 +2,7 @@ # agat_sp_statistics main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,61 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_sp_statistics main" - echo "" - echo "The script provides exhaustive statistics of a gft/gff file." - echo "" - echo "If you have isoforms in your file, even if correct, some values calculated" - echo "might sounds incoherent: e.g. total length mRNA can be superior than the" - echo "genome size. Because all isoforms length is added... It is why by" - echo "default we always compute the statistics twice when there are isoforms," - echo "once with the isoforms, once without (In that case we keep the longest" - echo "isoform per locus)." - echo "" - echo "Inputs:" - echo " -i, --gff" - echo " type: file, required parameter, file must exist" - echo " example: input.gff" - echo " Input GTF/GFF file." - echo "" - echo " --gs_fasta" - echo " type: file, file must exist" - echo " example: genome.fasta" - echo " Genome size directly from a fasta file to compute more statistics." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.txt" - echo " The file where the results will be written." - echo "" - echo "Options:" - echo " -p, -d, --plot" - echo " type: boolean_true" - echo " When this option is used, an histogram of distribution of the features" - echo " will be printed in pdf files." - echo "" - echo " --gs_size" - echo " type: integer" - echo " example: 1000000" - echo " Genome size in nucleotides to compute more statistics." - echo "" - echo " -v, --verbose" - echo " type: integer" - echo " example: 1" - echo " Verbose option. To modify verbosity. Default is 1. 0 is quiet, 2 and 3" - echo " are increasing verbosity." - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT. The \`--config\`" - echo " option gives you the possibility to use your own AGAT config file" - echo " (located elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -507,9 +452,9 @@ RUN agat --version | sed 's/.*v\.//; s/\s.*//' | sed 's/^/AGAT: /' > /var/softwa LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_sp_statistics" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -624,6 +569,87 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_sp_statistics main" + echo "" + echo "The script provides exhaustive statistics of a gft/gff file." + echo "" + echo "If you have isoforms in your file, even if correct, some values calculated" + echo "might sounds incoherent: e.g. total length mRNA can be superior than the" + echo "genome size. Because all isoforms length is added... It is why by" + echo "default we always compute the statistics twice when there are isoforms," + echo "once with the isoforms, once without (In that case we keep the longest" + echo "isoform per locus)." + echo "" + echo "Inputs:" + echo " -i, --gff" + echo " type: file, required parameter, file must exist" + echo " example: input.gff" + echo " Input GTF/GFF file." + echo "" + echo " --gs_fasta" + echo " type: file, file must exist" + echo " example: genome.fasta" + echo " Genome size directly from a fasta file to compute more statistics." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.txt" + echo " The file where the results will be written." + echo "" + echo "Options:" + echo " -p, -d, --plot" + echo " type: boolean_true" + echo " When this option is used, an histogram of distribution of the features" + echo " will be printed in pdf files." + echo "" + echo " --gs_size" + echo " type: integer" + echo " example: 1000000" + echo " Genome size in nucleotides to compute more statistics." + echo "" + echo " -v, --verbose" + echo " type: integer" + echo " example: 1" + echo " Verbose option. To modify verbosity. Default is 1. 0 is quiet, 2 and 3" + echo " are increasing verbosity." + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT. The \`--config\`" + echo " option gives you the possibility to use your own AGAT config file" + echo " (located elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/agat/agat_sq_stat_basic/.config.vsh.yaml b/target/executable/agat/agat_sq_stat_basic/.config.vsh.yaml index 9027e832..23df60d5 100644 --- a/target/executable/agat/agat_sq_stat_basic/.config.vsh.yaml +++ b/target/executable/agat/agat_sq_stat_basic/.config.vsh.yaml @@ -124,6 +124,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -226,16 +229,16 @@ build_info: engine: "docker|native" output: "target/executable/agat/agat_sq_stat_basic" executable: "target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic b/target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic index aab33380..12b33fc9 100755 --- a/target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic +++ b/target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic @@ -2,7 +2,7 @@ # agat_sq_stat_basic main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,61 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "agat_sq_stat_basic main" - echo "" - echo "The script aims to provide basic statistics of a gtf/gff file." - echo "" - echo "Inputs:" - echo " -i, --file, --input, --gff" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input.gff" - echo " Input GTF/GFF file." - echo "" - echo " -g, --genome_size" - echo " type: integer" - echo " example: 10000" - echo " That input is designed to know the genome size in order to calculate the" - echo " percentage of the genome represented by each kind of feature type. You" - echo " can provide an INTEGER. Or you can also pass a fasta file using the" - echo " argument --genome_size_fasta. If both are provided, only the value of" - echo " --genome_size will be considered." - echo "" - echo " --genome_size_fasta" - echo " type: file, file must exist" - echo " example: genome.fasta" - echo " That input is designed to know the genome size in order to calculate the" - echo " percentage of the genome represented by each kind of feature type. You" - echo " can provide the genome in fasta format. Or you can also pass the size" - echo " directly as an integer using the argument --genome_size. If you provide" - echo " the fasta, the genome size will be calculated on the fly. If both are" - echo " provided, only the value of --genome_size will be considered." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.txt" - echo " Output file. The result is in tabulate format." - echo "" - echo "Arguments:" - echo " --inflate" - echo " type: boolean_true" - echo " Inflate the statistics taking into account feature with" - echo " multi-parents. Indeed to avoid redundant information, some gff" - echo " factorize identical features. e.g: one exon used in two" - echo " different isoform will be defined only once, and will have" - echo " multiple parent. By default the script count such feature only" - echo " once. Using the inflate option allows to count the feature and" - echo " its size as many time there are parents." - echo "" - echo " -c, --config" - echo " type: file, file must exist" - echo " example: custom_agat_config.yaml" - echo " AGAT config file. By default AGAT takes the original agat_config.yaml" - echo " shipped with AGAT. The \`--config\` option gives you the possibility to" - echo " use your own AGAT config file (located elsewhere or named differently)." -} # initialise variables VIASH_MODE='run' @@ -507,9 +452,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t LABEL org.opencontainers.image.authors="Leïla Paquay" LABEL org.opencontainers.image.description="Companion container for running component agat agat_sq_stat_basic" -LABEL org.opencontainers.image.created="2024-12-03T10:34:05Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -624,6 +569,87 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "agat_sq_stat_basic main" + echo "" + echo "The script aims to provide basic statistics of a gtf/gff file." + echo "" + echo "Inputs:" + echo " -i, --file, --input, --gff" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input.gff" + echo " Input GTF/GFF file." + echo "" + echo " -g, --genome_size" + echo " type: integer" + echo " example: 10000" + echo " That input is designed to know the genome size in order to calculate the" + echo " percentage of the genome represented by each kind of feature type. You" + echo " can provide an INTEGER. Or you can also pass a fasta file using the" + echo " argument --genome_size_fasta. If both are provided, only the value of" + echo " --genome_size will be considered." + echo "" + echo " --genome_size_fasta" + echo " type: file, file must exist" + echo " example: genome.fasta" + echo " That input is designed to know the genome size in order to calculate the" + echo " percentage of the genome represented by each kind of feature type. You" + echo " can provide the genome in fasta format. Or you can also pass the size" + echo " directly as an integer using the argument --genome_size. If you provide" + echo " the fasta, the genome size will be calculated on the fly. If both are" + echo " provided, only the value of --genome_size will be considered." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.txt" + echo " Output file. The result is in tabulate format." + echo "" + echo "Arguments:" + echo " --inflate" + echo " type: boolean_true" + echo " Inflate the statistics taking into account feature with" + echo " multi-parents. Indeed to avoid redundant information, some gff" + echo " factorize identical features. e.g: one exon used in two" + echo " different isoform will be defined only once, and will have" + echo " multiple parent. By default the script count such feature only" + echo " once. Using the inflate option allows to count the feature and" + echo " its size as many time there are parents." + echo "" + echo " -c, --config" + echo " type: file, file must exist" + echo " example: custom_agat_config.yaml" + echo " AGAT config file. By default AGAT takes the original agat_config.yaml" + echo " shipped with AGAT. The \`--config\` option gives you the possibility to" + echo " use your own AGAT config file (located elsewhere or named differently)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/arriba/.config.vsh.yaml b/target/executable/arriba/.config.vsh.yaml index 5b9cba5e..3d4e2c8e 100644 --- a/target/executable/arriba/.config.vsh.yaml +++ b/target/executable/arriba/.config.vsh.yaml @@ -603,6 +603,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: cpus: 1 commands: @@ -705,16 +708,16 @@ build_info: engine: "docker|native" output: "target/executable/arriba" executable: "target/executable/arriba/arriba" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/arriba/arriba b/target/executable/arriba/arriba index 61780ae3..533e7052 100755 --- a/target/executable/arriba/arriba +++ b/target/executable/arriba/arriba @@ -2,7 +2,7 @@ # arriba main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/arriba:2.4.0--h0033a41_2 +ENTRYPOINT [] +RUN arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/arriba: "\1"/' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component arriba" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" +LABEL org.opencontainers.image.source="https://github.com/suhrig/arriba" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "arriba main" @@ -473,404 +871,32 @@ function ViashHelp { echo " is incomplete" echo " (denoted as '...'), fill the gaps using the assembly sequence wherever" echo " possible." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/arriba:2.4.0--h0033a41_2 -ENTRYPOINT [] -RUN arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/arriba: "\1"/' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component arriba" -LABEL org.opencontainers.image.created="2024-12-03T10:33:55Z" -LABEL org.opencontainers.image.source="https://github.com/suhrig/arriba" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bases2fastq/.config.vsh.yaml b/target/executable/bases2fastq/.config.vsh.yaml index 1c5f7e08..14c3d4bb 100644 --- a/target/executable/bases2fastq/.config.vsh.yaml +++ b/target/executable/bases2fastq/.config.vsh.yaml @@ -285,6 +285,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -393,16 +396,16 @@ build_info: engine: "docker|native" output: "target/executable/bases2fastq" executable: "target/executable/bases2fastq/bases2fastq" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bases2fastq/bases2fastq b/target/executable/bases2fastq/bases2fastq index d1fcc9c2..59b5e102 100755 --- a/target/executable/bases2fastq/bases2fastq +++ b/target/executable/bases2fastq/bases2fastq @@ -2,7 +2,7 @@ # bases2fastq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,154 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bases2fastq main" - echo "" - echo "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences" - echo "instruments and converts base calls into FASTQ files." - echo "" - echo "Input:" - echo " --analysis_directory" - echo " type: file, required parameter, file must exist" - echo " example: input" - echo " Location of analysis directory" - echo "" - echo " -r, --run_manifest" - echo " type: file, file must exist" - echo " Location of run manifest to use instead of default RunManifest.csv found" - echo " in analysis directory" - echo "" - echo "Output:" - echo " -o, --output_directory" - echo " type: file, required parameter, output, file must exist" - echo " example: fastq_dir" - echo " Location to save output fastqs" - echo "" - echo " --report" - echo " type: file, output, file must exist" - echo " Output location for the HTML report" - echo "" - echo " --logs" - echo " type: file, output, file must exist" - echo " example: logs_dir" - echo " Directory containing log files" - echo "" - echo "Arguments:" - echo " --chemistry_version" - echo " type: string" - echo " Run parameters override, chemistry version." - echo "" - echo " -d, --demux_only" - echo " type: boolean_true" - echo " Generate demux files and indexing stats without generating FASTQ" - echo "" - echo " --detect_adapters" - echo " type: boolean_true" - echo " Detect adapters sequences, overriding any sequences present in run" - echo " manifest." - echo "" - echo " --error_on_missing" - echo " type: boolean_true" - echo " Terminate execution for a missing file (by default, missing files are" - echo " skipped and execution continues). Also set by --strict." - echo "" - echo " -e, --exclude_tile" - echo " type: string, multiple values allowed" - echo " Regex matching tile names to exclude. This flag can be specified" - echo " multiple times. (e.g. L1.*C0[23]S.)" - echo "" - echo " --filter_mask" - echo " type: string" - echo " Run parameters override, custom pass filter mask." - echo "" - echo " --flowcell_id" - echo " type: string" - echo " Run parameters override, flowcell ID." - echo "" - echo " --force_index_orientation" - echo " type: boolean_true" - echo " Do not attempt to find orientation for I1/I2 reads (reverse complement)." - echo " Use orientation given in run manifest." - echo "" - echo " --group_fastq" - echo " type: boolean_true" - echo " Group all FASTQ/stats/metrics for a project are in the project folder." - echo "" - echo " --i1_cycles" - echo " type: integer" - echo " min: 1" - echo " Run parameters override, I1 cycles." - echo "" - echo " --i2_cycles" - echo " type: integer" - echo " min: 1" - echo " Run parameters override, I2 cycles" - echo "" - echo " -i, --include_tile" - echo " type: string, multiple values allowed" - echo " Regex matching tile names to include. This flag" - echo " can be specified multiple times. (e.g. L1.*C0[23]S.)" - echo "" - echo " --kit_configuration" - echo " type: string" - echo " Run parameters override, kit configuration." - echo "" - echo " --legacy_fastq" - echo " type: boolean_true" - echo " Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)" - echo "" - echo " -l, --log_level" - echo " type: string" - echo " example: INFO" - echo " choices: [ DEBUG, INFO, WARNING, ERROR ]" - echo " Severity level for logging." - echo "" - echo " --no_error_on_invalid" - echo " type: boolean_true" - echo " Skip invalid files and continue execution. Overridden by --strict" - echo " options" - echo "" - echo " --no_projects" - echo " type: boolean_true" - echo " Disable project directories" - echo "" - echo " --num_unassigned" - echo " type: integer" - echo " example: 30" - echo " min: 0" - echo " max: 1000" - echo " Max Number of unassigned sequences to report." - echo "" - echo " --preparation_workflow" - echo " type: string" - echo " Run parameters override, preparation workflow." - echo "" - echo " --qc_only" - echo " type: boolean_true" - echo " Quickly generate run stats for single tile without generating FASTQ." - echo " Use --include_tile/--exclude_tile to define custom tile set." - echo "" - echo " --r1_cycles" - echo " type: integer" - echo " min: 1" - echo " Run parameters override, R1 cycles." - echo "" - echo " --r2_cycles" - echo " type: integer" - echo " min: 1" - echo " Run parameters override, R2 cycles." - echo "" - echo " --split_lanes" - echo " type: boolean_true" - echo " Split FASTQ files by lane." - echo "" - echo " --strict" - echo " type: boolean_true" - echo " In strict mode any invalid or missing input file will terminate" - echo " execution" - echo " (overrides no_error_on_invalid and sets --error_on_missing)" -} # initialise variables VIASH_MODE='run' @@ -604,9 +456,9 @@ RUN echo "bases2fastq: $(bases2fastq --version | cut -d' ' -f3)" > /var/software LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component bases2fastq" -LABEL org.opencontainers.image.created="2024-12-03T10:34:06Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -721,6 +573,180 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bases2fastq main" + echo "" + echo "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences" + echo "instruments and converts base calls into FASTQ files." + echo "" + echo "Input:" + echo " --analysis_directory" + echo " type: file, required parameter, file must exist" + echo " example: input" + echo " Location of analysis directory" + echo "" + echo " -r, --run_manifest" + echo " type: file, file must exist" + echo " Location of run manifest to use instead of default RunManifest.csv found" + echo " in analysis directory" + echo "" + echo "Output:" + echo " -o, --output_directory" + echo " type: file, required parameter, output, file must exist" + echo " example: fastq_dir" + echo " Location to save output fastqs" + echo "" + echo " --report" + echo " type: file, output, file must exist" + echo " Output location for the HTML report" + echo "" + echo " --logs" + echo " type: file, output, file must exist" + echo " example: logs_dir" + echo " Directory containing log files" + echo "" + echo "Arguments:" + echo " --chemistry_version" + echo " type: string" + echo " Run parameters override, chemistry version." + echo "" + echo " -d, --demux_only" + echo " type: boolean_true" + echo " Generate demux files and indexing stats without generating FASTQ" + echo "" + echo " --detect_adapters" + echo " type: boolean_true" + echo " Detect adapters sequences, overriding any sequences present in run" + echo " manifest." + echo "" + echo " --error_on_missing" + echo " type: boolean_true" + echo " Terminate execution for a missing file (by default, missing files are" + echo " skipped and execution continues). Also set by --strict." + echo "" + echo " -e, --exclude_tile" + echo " type: string, multiple values allowed" + echo " Regex matching tile names to exclude. This flag can be specified" + echo " multiple times. (e.g. L1.*C0[23]S.)" + echo "" + echo " --filter_mask" + echo " type: string" + echo " Run parameters override, custom pass filter mask." + echo "" + echo " --flowcell_id" + echo " type: string" + echo " Run parameters override, flowcell ID." + echo "" + echo " --force_index_orientation" + echo " type: boolean_true" + echo " Do not attempt to find orientation for I1/I2 reads (reverse complement)." + echo " Use orientation given in run manifest." + echo "" + echo " --group_fastq" + echo " type: boolean_true" + echo " Group all FASTQ/stats/metrics for a project are in the project folder." + echo "" + echo " --i1_cycles" + echo " type: integer" + echo " min: 1" + echo " Run parameters override, I1 cycles." + echo "" + echo " --i2_cycles" + echo " type: integer" + echo " min: 1" + echo " Run parameters override, I2 cycles" + echo "" + echo " -i, --include_tile" + echo " type: string, multiple values allowed" + echo " Regex matching tile names to include. This flag" + echo " can be specified multiple times. (e.g. L1.*C0[23]S.)" + echo "" + echo " --kit_configuration" + echo " type: string" + echo " Run parameters override, kit configuration." + echo "" + echo " --legacy_fastq" + echo " type: boolean_true" + echo " Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)" + echo "" + echo " -l, --log_level" + echo " type: string" + echo " example: INFO" + echo " choices: [ DEBUG, INFO, WARNING, ERROR ]" + echo " Severity level for logging." + echo "" + echo " --no_error_on_invalid" + echo " type: boolean_true" + echo " Skip invalid files and continue execution. Overridden by --strict" + echo " options" + echo "" + echo " --no_projects" + echo " type: boolean_true" + echo " Disable project directories" + echo "" + echo " --num_unassigned" + echo " type: integer" + echo " example: 30" + echo " min: 0" + echo " max: 1000" + echo " Max Number of unassigned sequences to report." + echo "" + echo " --preparation_workflow" + echo " type: string" + echo " Run parameters override, preparation workflow." + echo "" + echo " --qc_only" + echo " type: boolean_true" + echo " Quickly generate run stats for single tile without generating FASTQ." + echo " Use --include_tile/--exclude_tile to define custom tile set." + echo "" + echo " --r1_cycles" + echo " type: integer" + echo " min: 1" + echo " Run parameters override, R1 cycles." + echo "" + echo " --r2_cycles" + echo " type: integer" + echo " min: 1" + echo " Run parameters override, R2 cycles." + echo "" + echo " --split_lanes" + echo " type: boolean_true" + echo " Split FASTQ files by lane." + echo "" + echo " --strict" + echo " type: boolean_true" + echo " In strict mode any invalid or missing input file will terminate" + echo " execution" + echo " (overrides no_error_on_invalid and sets --error_on_missing)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bbmap/bbmap_bbsplit/.config.vsh.yaml b/target/executable/bbmap/bbmap_bbsplit/.config.vsh.yaml index 541818f4..d73c1076 100644 --- a/target/executable/bbmap/bbmap_bbsplit/.config.vsh.yaml +++ b/target/executable/bbmap/bbmap_bbsplit/.config.vsh.yaml @@ -267,6 +267,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -367,16 +370,16 @@ build_info: engine: "docker|native" output: "target/executable/bbmap/bbmap_bbsplit" executable: "target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit b/target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit index 5e631179..34429cf2 100755 --- a/target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit +++ b/target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit @@ -2,7 +2,7 @@ # bbmap_bbsplit main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,152 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bbmap_bbsplit main" - echo "" - echo "Split sequencing reads by mapping them to multiple references simultaneously." - echo "" - echo "Input:" - echo " --id" - echo " type: string" - echo " Sample ID" - echo "" - echo " --paired" - echo " type: boolean_true" - echo " Paired fastq files or not?" - echo "" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " example: reads.fastq" - echo " Input fastq files, either one or two (paired), separated by \";\"." - echo "" - echo " --ref" - echo " type: file, multiple values allowed, file must exist" - echo " Reference FASTA files, separated by \";\". The primary reference should be" - echo " specified first." - echo "" - echo " --only_build_index" - echo " type: boolean_true" - echo " If set, only builds the index. Otherwise, mapping is performed." - echo "" - echo " --build" - echo " type: file, file must exist" - echo " Index to be used for mapping." - echo "" - echo " --qin" - echo " type: string" - echo " Set to 33 or 64 to specify input quality value ASCII offset." - echo " Automatically detected if" - echo " not specified." - echo "" - echo " --interleaved" - echo " type: boolean_true" - echo " True forces paired/interleaved input; false forces single-ended mapping." - echo " If not specified, interleaved status will be autodetected from read" - echo " names." - echo "" - echo " --maxindel" - echo " type: integer" - echo " example: 20" - echo " Don't look for indels longer than this. Lower is faster. Set to >=100k" - echo " for RNA-seq." - echo "" - echo " --minratio" - echo " type: double" - echo " example: 0.56" - echo " Fraction of max alignment score required to keep a site. Higher is" - echo " faster." - echo "" - echo " --minhits" - echo " type: integer" - echo " example: 1" - echo " Minimum number of seed hits required for candidate sites. Higher is" - echo " faster." - echo "" - echo " --ambiguous" - echo " type: string" - echo " example: best" - echo " choices: [ best, toss, random, all ]" - echo " Set behavior on ambiguously-mapped reads (with multiple top-scoring" - echo " mapping locations)." - echo " * best Use the first best site (Default)" - echo " * toss Consider unmapped" - echo " * random Select one top-scoring site randomly" - echo " * all Retain all top-scoring sites. Does not work yet with SAM" - echo " output" - echo "" - echo " --ambiguous2" - echo " type: string" - echo " example: best" - echo " choices: [ best, toss, all, split ]" - echo " Set behavior only for reads that map ambiguously to multiple different" - echo " references." - echo " Normal 'ambiguous=' controls behavior on all ambiguous reads;" - echo " Ambiguous2 excludes reads that map ambiguously within a single" - echo " reference." - echo " * best Use the first best site (Default)" - echo " * toss Consider unmapped" - echo " * all Write a copy to the output for each reference to which it" - echo " maps" - echo " * split Write a copy to the AMBIGUOUS_ output for each reference to" - echo " which it maps" - echo "" - echo " --qtrim" - echo " type: string" - echo " choices: [ l, r, lr ]" - echo " Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r'" - echo " (right), and 'lr' (both)." - echo "" - echo " --untrim" - echo " type: boolean_true" - echo " Undo trimming after mapping. Untrimmed bases will be soft-clipped in" - echo " cigar strings." - echo "" - echo "Output:" - echo " --index" - echo " type: file, output, file must exist" - echo " example: BBSplit_index" - echo " Location to write the index." - echo "" - echo " --fastq_1" - echo " type: file, output, file must exist" - echo " example: read_out1.fastq" - echo " Output file for read 1." - echo "" - echo " --fastq_2" - echo " type: file, output, file must exist" - echo " example: read_out2.fastq" - echo " Output file for read 2." - echo "" - echo " --bs, --sam2bam" - echo " type: file, output, file must exist" - echo " example: script.sh" - echo " Write a shell script to 'file' that will turn the sam output into a" - echo " sorted, indexed bam file." - echo "" - echo " --scafstats" - echo " type: file, output, file must exist" - echo " example: scaffold_stats.txt" - echo " Write statistics on how many reads mapped to which scaffold to this" - echo " file." - echo "" - echo " --refstats" - echo " type: file, output, file must exist" - echo " example: reference_stats.txt" - echo " Write statistics on how many reads were assigned to which reference to" - echo " this file." - echo " Unmapped reads whose mate mapped to a reference are considered assigned" - echo " and will be counted." - echo "" - echo " --nzo" - echo " type: boolean_true" - echo " Only print lines with nonzero coverage." - echo "" - echo " --bbmap_args" - echo " type: string" - echo " Additional arguments from BBMap to pass to BBSplit." -} # initialise variables VIASH_MODE='run' @@ -600,9 +454,9 @@ cp -r bbmap/* /usr/local/bin RUN bbsplit.sh --version 2>&1 | awk '/BBMap version/{print "BBMAP:", $NF}' > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component bbmap bbmap_bbsplit" -LABEL org.opencontainers.image.created="2024-12-03T10:33:54Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -717,6 +571,178 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bbmap_bbsplit main" + echo "" + echo "Split sequencing reads by mapping them to multiple references simultaneously." + echo "" + echo "Input:" + echo " --id" + echo " type: string" + echo " Sample ID" + echo "" + echo " --paired" + echo " type: boolean_true" + echo " Paired fastq files or not?" + echo "" + echo " --input" + echo " type: file, multiple values allowed, file must exist" + echo " example: reads.fastq" + echo " Input fastq files, either one or two (paired), separated by \";\"." + echo "" + echo " --ref" + echo " type: file, multiple values allowed, file must exist" + echo " Reference FASTA files, separated by \";\". The primary reference should be" + echo " specified first." + echo "" + echo " --only_build_index" + echo " type: boolean_true" + echo " If set, only builds the index. Otherwise, mapping is performed." + echo "" + echo " --build" + echo " type: file, file must exist" + echo " Index to be used for mapping." + echo "" + echo " --qin" + echo " type: string" + echo " Set to 33 or 64 to specify input quality value ASCII offset." + echo " Automatically detected if" + echo " not specified." + echo "" + echo " --interleaved" + echo " type: boolean_true" + echo " True forces paired/interleaved input; false forces single-ended mapping." + echo " If not specified, interleaved status will be autodetected from read" + echo " names." + echo "" + echo " --maxindel" + echo " type: integer" + echo " example: 20" + echo " Don't look for indels longer than this. Lower is faster. Set to >=100k" + echo " for RNA-seq." + echo "" + echo " --minratio" + echo " type: double" + echo " example: 0.56" + echo " Fraction of max alignment score required to keep a site. Higher is" + echo " faster." + echo "" + echo " --minhits" + echo " type: integer" + echo " example: 1" + echo " Minimum number of seed hits required for candidate sites. Higher is" + echo " faster." + echo "" + echo " --ambiguous" + echo " type: string" + echo " example: best" + echo " choices: [ best, toss, random, all ]" + echo " Set behavior on ambiguously-mapped reads (with multiple top-scoring" + echo " mapping locations)." + echo " * best Use the first best site (Default)" + echo " * toss Consider unmapped" + echo " * random Select one top-scoring site randomly" + echo " * all Retain all top-scoring sites. Does not work yet with SAM" + echo " output" + echo "" + echo " --ambiguous2" + echo " type: string" + echo " example: best" + echo " choices: [ best, toss, all, split ]" + echo " Set behavior only for reads that map ambiguously to multiple different" + echo " references." + echo " Normal 'ambiguous=' controls behavior on all ambiguous reads;" + echo " Ambiguous2 excludes reads that map ambiguously within a single" + echo " reference." + echo " * best Use the first best site (Default)" + echo " * toss Consider unmapped" + echo " * all Write a copy to the output for each reference to which it" + echo " maps" + echo " * split Write a copy to the AMBIGUOUS_ output for each reference to" + echo " which it maps" + echo "" + echo " --qtrim" + echo " type: string" + echo " choices: [ l, r, lr ]" + echo " Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r'" + echo " (right), and 'lr' (both)." + echo "" + echo " --untrim" + echo " type: boolean_true" + echo " Undo trimming after mapping. Untrimmed bases will be soft-clipped in" + echo " cigar strings." + echo "" + echo "Output:" + echo " --index" + echo " type: file, output, file must exist" + echo " example: BBSplit_index" + echo " Location to write the index." + echo "" + echo " --fastq_1" + echo " type: file, output, file must exist" + echo " example: read_out1.fastq" + echo " Output file for read 1." + echo "" + echo " --fastq_2" + echo " type: file, output, file must exist" + echo " example: read_out2.fastq" + echo " Output file for read 2." + echo "" + echo " --bs, --sam2bam" + echo " type: file, output, file must exist" + echo " example: script.sh" + echo " Write a shell script to 'file' that will turn the sam output into a" + echo " sorted, indexed bam file." + echo "" + echo " --scafstats" + echo " type: file, output, file must exist" + echo " example: scaffold_stats.txt" + echo " Write statistics on how many reads mapped to which scaffold to this" + echo " file." + echo "" + echo " --refstats" + echo " type: file, output, file must exist" + echo " example: reference_stats.txt" + echo " Write statistics on how many reads were assigned to which reference to" + echo " this file." + echo " Unmapped reads whose mate mapped to a reference are considered assigned" + echo " and will be counted." + echo "" + echo " --nzo" + echo " type: boolean_true" + echo " Only print lines with nonzero coverage." + echo "" + echo " --bbmap_args" + echo " type: string" + echo " Additional arguments from BBMap to pass to BBSplit." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcftools/bcftools_annotate/.config.vsh.yaml b/target/executable/bcftools/bcftools_annotate/.config.vsh.yaml index 4cfbe259..95410503 100644 --- a/target/executable/bcftools/bcftools_annotate/.config.vsh.yaml +++ b/target/executable/bcftools/bcftools_annotate/.config.vsh.yaml @@ -355,6 +355,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -468,16 +471,16 @@ build_info: engine: "docker|native" output: "target/executable/bcftools/bcftools_annotate" executable: "target/executable/bcftools/bcftools_annotate/bcftools_annotate" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcftools/bcftools_annotate/bcftools_annotate b/target/executable/bcftools/bcftools_annotate/bcftools_annotate index 09cdffc7..22fff05b 100755 --- a/target/executable/bcftools/bcftools_annotate/bcftools_annotate +++ b/target/executable/bcftools/bcftools_annotate/bcftools_annotate @@ -2,7 +2,7 @@ # bcftools_annotate main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,200 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcftools_annotate main" - echo "" - echo "Add or remove annotations from a VCF/BCF file." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Input VCF/BCF file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output annotated file." - echo "" - echo "Options:" - echo " For examples on how to use use bcftools annotate see" - echo " http://samtools.github.io/bcftools/howtos/annotate.html." - echo " For more details on the options see" - echo " https://samtools.github.io/bcftools/bcftools.html#annotate." - echo "" - echo " --a, --annotations" - echo " type: file, file must exist" - echo " VCF file or tabix-indexed FILE with annotations: CHR\\tPOS[\\tVALUE]+ ." - echo "" - echo " --c, --columns" - echo " type: string" - echo " List of columns in the annotation file, e.g." - echo " CHROM,POS,REF,ALT,-,INFO/TAG." - echo " See man page for details." - echo "" - echo " --C, --columns_file" - echo " type: file, file must exist" - echo " Read -c columns from FILE, one name per row, with optional --merge_logic" - echo " TYPE: NAME[ TYPE]." - echo "" - echo " --e, --exclude" - echo " type: string" - echo " example: QUAL >= 30 && DP >= 10" - echo " Exclude sites for which the expression is true." - echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" - echo " details." - echo "" - echo " --force" - echo " type: boolean_true" - echo " continue even when parsing errors, such as undefined tags, are" - echo " encountered." - echo " Note this can be an unsafe operation and can result in corrupted BCF" - echo " files." - echo " If this option is used, make sure to sanity check the result thoroughly." - echo "" - echo " --H, --header_line" - echo " type: string" - echo " Header line which should be appended to the VCF header, can be given" - echo " multiple times." - echo "" - echo " --h, --header_lines" - echo " type: file, file must exist" - echo " File with header lines to append to the VCF header." - echo " For example:" - echo " ##INFO=" - echo " ##INFO=" - echo "" - echo " --I, --set_id" - echo " type: string" - echo " Set ID column using a \`bcftools query\`-like expression, see man page for" - echo " details." - echo "" - echo " --include" - echo " type: string" - echo " example: QUAL >= 30 && DP >= 10" - echo " Select sites for which the expression is true." - echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" - echo " details." - echo "" - echo " --k, --keep_sites" - echo " type: boolean_true" - echo " Leave --include/--exclude sites unchanged instead of discarding them." - echo "" - echo " --l, --merge_logic" - echo " type: string" - echo " When multiple regions overlap a single record, this option defines how" - echo " to treat multiple annotation values." - echo " See man page for more details." - echo "" - echo " --m, --mark_sites" - echo " type: string" - echo " Annotate sites which are present (\"+\") or absent (\"-\") in the -a file" - echo " with a new INFO/TAG flag." - echo "" - echo " --min_overlap" - echo " type: string" - echo " Minimum overlap required as a fraction of the variant in the annotation" - echo " -a file (ANN)," - echo " in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF)." - echo " By default overlaps of arbitrary length are sufficient." - echo " The option can be used only with the tab-delimited annotation -a file" - echo " and with BEG and END columns present." - echo "" - echo " --no_version" - echo " type: boolean_true" - echo " Do not append version and command line information to the output VCF" - echo " header." - echo "" - echo " --O, --output_type" - echo " type: string" - echo " choices: [ u, z, b, v ]" - echo " Output type:" - echo " u: uncompressed BCF" - echo " z: compressed VCF" - echo " b: compressed BCF" - echo " v: uncompressed VCF" - echo "" - echo " --pair_logic" - echo " type: string" - echo " choices: [ snps, indels, both, all, some, exact ]" - echo " Controls how to match records from the annotation file to the target" - echo " VCF." - echo " Effective only when -a is a VCF or BCF file." - echo " The option replaces the former uninuitive --collapse." - echo " See Common Options for more." - echo "" - echo " --r, --regions" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Restrict to comma-separated list of regions." - echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." - echo "" - echo " --R, --regions_file" - echo " type: file, file must exist" - echo " Restrict to regions listed in a file." - echo " Regions can be specified either on a VCF, BED, or tab-delimited file" - echo " (the default)." - echo " For more information check manual." - echo "" - echo " --regions_overlap" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " This option controls how overlapping records are determined:" - echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" - echo " (this corresponds to the default behavior of -t/-T);" - echo " set to 'record' or '1' if also overlapping records with POS outside a" - echo " region should be included (this is the default behavior of -r/-R," - echo " and includes indels with POS at the end of a region, which are" - echo " technically outside the region);" - echo " or set to 'variant' or '2' to include only true overlapping variation" - echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" - echo " variation \"A>-\")." - echo "" - echo " --rename_annotations" - echo " type: file, file must exist" - echo " Rename annotations: TYPE/old\\tnew, where TYPE is one of" - echo " FILTER,INFO,FORMAT." - echo "" - echo " --rename_chromosomes" - echo " type: file, file must exist" - echo " Rename chromosomes according to the map in file, with \"old_name" - echo " new_name\\n\" pairs" - echo " separated by whitespaces, each on a separate line." - echo "" - echo " --samples" - echo " type: string" - echo " Subset of samples to annotate." - echo " See also" - echo " https://samtools.github.io/bcftools/bcftools.html#common_options." - echo "" - echo " --samples_file" - echo " type: file, file must exist" - echo " Subset of samples to annotate in file format." - echo " See also" - echo " https://samtools.github.io/bcftools/bcftools.html#common_options." - echo "" - echo " --single_overlaps" - echo " type: boolean_true" - echo " Use this option to keep memory requirements low with very large" - echo " annotation files." - echo " Note, however, that this comes at a cost, only single overlapping" - echo " intervals are considered in this mode." - echo " This was the default mode until the commit af6f0c9 (Feb 24 2019)." - echo "" - echo " --x, --remove" - echo " type: string" - echo " List of annotations to remove." - echo " Use \"FILTER\" to remove all filters or \"FILTER/SomeFilter\" to remove a" - echo " specific filter." - echo " Similarly, \"INFO\" can be used to remove all INFO tags and \"FORMAT\" to" - echo " remove all FORMAT tags except GT." - echo " To remove all INFO tags except \"FOO\" and \"BAR\", use \"^INFO/FOO,INFO/BAR\"" - echo " (and similarly for FORMAT and FILTER)." - echo " \"INFO\" can be abbreviated to \"INF\" and \"FORMAT\" to \"FMT\"." -} # initialise variables VIASH_MODE='run' @@ -650,9 +456,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_annotate" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -767,6 +573,226 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcftools_annotate main" + echo "" + echo "Add or remove annotations from a VCF/BCF file." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Input VCF/BCF file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output annotated file." + echo "" + echo "Options:" + echo " For examples on how to use use bcftools annotate see" + echo " http://samtools.github.io/bcftools/howtos/annotate.html." + echo " For more details on the options see" + echo " https://samtools.github.io/bcftools/bcftools.html#annotate." + echo "" + echo " --a, --annotations" + echo " type: file, file must exist" + echo " VCF file or tabix-indexed FILE with annotations: CHR\\tPOS[\\tVALUE]+ ." + echo "" + echo " --c, --columns" + echo " type: string" + echo " List of columns in the annotation file, e.g." + echo " CHROM,POS,REF,ALT,-,INFO/TAG." + echo " See man page for details." + echo "" + echo " --C, --columns_file" + echo " type: file, file must exist" + echo " Read -c columns from FILE, one name per row, with optional --merge_logic" + echo " TYPE: NAME[ TYPE]." + echo "" + echo " --e, --exclude" + echo " type: string" + echo " example: QUAL >= 30 && DP >= 10" + echo " Exclude sites for which the expression is true." + echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" + echo " details." + echo "" + echo " --force" + echo " type: boolean_true" + echo " continue even when parsing errors, such as undefined tags, are" + echo " encountered." + echo " Note this can be an unsafe operation and can result in corrupted BCF" + echo " files." + echo " If this option is used, make sure to sanity check the result thoroughly." + echo "" + echo " --H, --header_line" + echo " type: string" + echo " Header line which should be appended to the VCF header, can be given" + echo " multiple times." + echo "" + echo " --h, --header_lines" + echo " type: file, file must exist" + echo " File with header lines to append to the VCF header." + echo " For example:" + echo " ##INFO=" + echo " ##INFO=" + echo "" + echo " --I, --set_id" + echo " type: string" + echo " Set ID column using a \`bcftools query\`-like expression, see man page for" + echo " details." + echo "" + echo " --include" + echo " type: string" + echo " example: QUAL >= 30 && DP >= 10" + echo " Select sites for which the expression is true." + echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" + echo " details." + echo "" + echo " --k, --keep_sites" + echo " type: boolean_true" + echo " Leave --include/--exclude sites unchanged instead of discarding them." + echo "" + echo " --l, --merge_logic" + echo " type: string" + echo " When multiple regions overlap a single record, this option defines how" + echo " to treat multiple annotation values." + echo " See man page for more details." + echo "" + echo " --m, --mark_sites" + echo " type: string" + echo " Annotate sites which are present (\"+\") or absent (\"-\") in the -a file" + echo " with a new INFO/TAG flag." + echo "" + echo " --min_overlap" + echo " type: string" + echo " Minimum overlap required as a fraction of the variant in the annotation" + echo " -a file (ANN)," + echo " in the target VCF file (:VCF), or both for reciprocal overlap (ANN:VCF)." + echo " By default overlaps of arbitrary length are sufficient." + echo " The option can be used only with the tab-delimited annotation -a file" + echo " and with BEG and END columns present." + echo "" + echo " --no_version" + echo " type: boolean_true" + echo " Do not append version and command line information to the output VCF" + echo " header." + echo "" + echo " --O, --output_type" + echo " type: string" + echo " choices: [ u, z, b, v ]" + echo " Output type:" + echo " u: uncompressed BCF" + echo " z: compressed VCF" + echo " b: compressed BCF" + echo " v: uncompressed VCF" + echo "" + echo " --pair_logic" + echo " type: string" + echo " choices: [ snps, indels, both, all, some, exact ]" + echo " Controls how to match records from the annotation file to the target" + echo " VCF." + echo " Effective only when -a is a VCF or BCF file." + echo " The option replaces the former uninuitive --collapse." + echo " See Common Options for more." + echo "" + echo " --r, --regions" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Restrict to comma-separated list of regions." + echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." + echo "" + echo " --R, --regions_file" + echo " type: file, file must exist" + echo " Restrict to regions listed in a file." + echo " Regions can be specified either on a VCF, BED, or tab-delimited file" + echo " (the default)." + echo " For more information check manual." + echo "" + echo " --regions_overlap" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " This option controls how overlapping records are determined:" + echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" + echo " (this corresponds to the default behavior of -t/-T);" + echo " set to 'record' or '1' if also overlapping records with POS outside a" + echo " region should be included (this is the default behavior of -r/-R," + echo " and includes indels with POS at the end of a region, which are" + echo " technically outside the region);" + echo " or set to 'variant' or '2' to include only true overlapping variation" + echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" + echo " variation \"A>-\")." + echo "" + echo " --rename_annotations" + echo " type: file, file must exist" + echo " Rename annotations: TYPE/old\\tnew, where TYPE is one of" + echo " FILTER,INFO,FORMAT." + echo "" + echo " --rename_chromosomes" + echo " type: file, file must exist" + echo " Rename chromosomes according to the map in file, with \"old_name" + echo " new_name\\n\" pairs" + echo " separated by whitespaces, each on a separate line." + echo "" + echo " --samples" + echo " type: string" + echo " Subset of samples to annotate." + echo " See also" + echo " https://samtools.github.io/bcftools/bcftools.html#common_options." + echo "" + echo " --samples_file" + echo " type: file, file must exist" + echo " Subset of samples to annotate in file format." + echo " See also" + echo " https://samtools.github.io/bcftools/bcftools.html#common_options." + echo "" + echo " --single_overlaps" + echo " type: boolean_true" + echo " Use this option to keep memory requirements low with very large" + echo " annotation files." + echo " Note, however, that this comes at a cost, only single overlapping" + echo " intervals are considered in this mode." + echo " This was the default mode until the commit af6f0c9 (Feb 24 2019)." + echo "" + echo " --x, --remove" + echo " type: string" + echo " List of annotations to remove." + echo " Use \"FILTER\" to remove all filters or \"FILTER/SomeFilter\" to remove a" + echo " specific filter." + echo " Similarly, \"INFO\" can be used to remove all INFO tags and \"FORMAT\" to" + echo " remove all FORMAT tags except GT." + echo " To remove all INFO tags except \"FOO\" and \"BAR\", use \"^INFO/FOO,INFO/BAR\"" + echo " (and similarly for FORMAT and FILTER)." + echo " \"INFO\" can be abbreviated to \"INF\" and \"FORMAT\" to \"FMT\"." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcftools/bcftools_concat/.config.vsh.yaml b/target/executable/bcftools/bcftools_concat/.config.vsh.yaml index 84657444..f2b99752 100644 --- a/target/executable/bcftools/bcftools_concat/.config.vsh.yaml +++ b/target/executable/bcftools/bcftools_concat/.config.vsh.yaml @@ -221,6 +221,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -334,16 +337,16 @@ build_info: engine: "docker|native" output: "target/executable/bcftools/bcftools_concat" executable: "target/executable/bcftools/bcftools_concat/bcftools_concat" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcftools/bcftools_concat/bcftools_concat b/target/executable/bcftools/bcftools_concat/bcftools_concat index f17af7f3..6ffb3bd7 100755 --- a/target/executable/bcftools/bcftools_concat/bcftools_concat +++ b/target/executable/bcftools/bcftools_concat/bcftools_concat @@ -2,7 +2,7 @@ # bcftools_concat main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,116 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcftools_concat main" - echo "" - echo "Concatenate or combine VCF/BCF files. All source files must have the same sample" - echo "columns appearing in the same order. The program can be used, for example, to" - echo "concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel" - echo "VCF into one. The input files must be sorted by chr and position. The files" - echo "must be given in the correct order to produce sorted VCF on output unless" - echo "the -a, --allow-overlaps option is specified. With the --naive option, the files" - echo "are concatenated without being recompressed, which is very fast." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, multiple values allowed, file must exist" - echo " Input VCF/BCF files to concatenate." - echo "" - echo " -f, --file_list" - echo " type: file, file must exist" - echo " Read the list of VCF/BCF files from a file, one file name per line." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output concatenated VCF/BCF file." - echo "" - echo "Options:" - echo " -a, --allow_overlaps" - echo " type: boolean_true" - echo " First coordinate of the next file can precede last record of the current" - echo " file." - echo "" - echo " -c, --compact_PS" - echo " type: boolean_true" - echo " Do not output PS tag at each site, only at the start of a new phase set" - echo " block." - echo "" - echo " -d, --remove_duplicates" - echo " type: string" - echo " choices: [ snps, indels, both, all, exact, none ]" - echo " Output duplicate records present in multiple files only once:" - echo " ." - echo "" - echo " -l, --ligate" - echo " type: boolean_true" - echo " Ligate phased VCFs by matching phase at overlapping haplotypes." - echo "" - echo " --ligate_force" - echo " type: boolean_true" - echo " Ligate even non-overlapping chunks, keep all sites." - echo "" - echo " --ligate_warn" - echo " type: boolean_true" - echo " Drop sites in imperfect overlaps." - echo "" - echo " --no_version" - echo " type: boolean_true" - echo " Do not append version and command line information to the header." - echo "" - echo " -n, --naive" - echo " type: boolean_true" - echo " Concatenate files without recompression, a header check compatibility is" - echo " performed." - echo "" - echo " --naive_force" - echo " type: boolean_true" - echo " Same as --naive, but header compatibility is not checked." - echo " Dangerous, use with caution." - echo "" - echo " -O, --output_type" - echo " type: string" - echo " choices: [ u, z, b, v ]" - echo " Output type:" - echo " u: uncompressed BCF" - echo " z: compressed VCF" - echo " b: compressed BCF" - echo " v: uncompressed VCF" - echo "" - echo " -q, --min_PQ" - echo " type: integer" - echo " example: 30" - echo " Break phase set if phasing quality is lower than ." - echo "" - echo " -r, --regions" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Restrict to comma-separated list of regions." - echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." - echo "" - echo " -R, --regions_file" - echo " type: file, file must exist" - echo " Restrict to regions listed in a file." - echo " Regions can be specified either on a VCF, BED, or tab-delimited file" - echo " (the default)." - echo " For more information check manual." - echo "" - echo " --regions_overlap" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " This option controls how overlapping records are determined:" - echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" - echo " (this corresponds to the default behavior of -t/-T);" - echo " set to 'record' or '1' if also overlapping records with POS outside a" - echo " region should be included (this is the default behavior of -r/-R," - echo " and includes indels with POS at the end of a region, which are" - echo " technically outside the region);" - echo " or set to 'variant' or '2' to include only true overlapping variation" - echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" - echo " variation \"A>-\")." -} # initialise variables VIASH_MODE='run' @@ -566,9 +456,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_concat" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -683,6 +573,142 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcftools_concat main" + echo "" + echo "Concatenate or combine VCF/BCF files. All source files must have the same sample" + echo "columns appearing in the same order. The program can be used, for example, to" + echo "concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel" + echo "VCF into one. The input files must be sorted by chr and position. The files" + echo "must be given in the correct order to produce sorted VCF on output unless" + echo "the -a, --allow-overlaps option is specified. With the --naive option, the files" + echo "are concatenated without being recompressed, which is very fast." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, multiple values allowed, file must exist" + echo " Input VCF/BCF files to concatenate." + echo "" + echo " -f, --file_list" + echo " type: file, file must exist" + echo " Read the list of VCF/BCF files from a file, one file name per line." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output concatenated VCF/BCF file." + echo "" + echo "Options:" + echo " -a, --allow_overlaps" + echo " type: boolean_true" + echo " First coordinate of the next file can precede last record of the current" + echo " file." + echo "" + echo " -c, --compact_PS" + echo " type: boolean_true" + echo " Do not output PS tag at each site, only at the start of a new phase set" + echo " block." + echo "" + echo " -d, --remove_duplicates" + echo " type: string" + echo " choices: [ snps, indels, both, all, exact, none ]" + echo " Output duplicate records present in multiple files only once:" + echo " ." + echo "" + echo " -l, --ligate" + echo " type: boolean_true" + echo " Ligate phased VCFs by matching phase at overlapping haplotypes." + echo "" + echo " --ligate_force" + echo " type: boolean_true" + echo " Ligate even non-overlapping chunks, keep all sites." + echo "" + echo " --ligate_warn" + echo " type: boolean_true" + echo " Drop sites in imperfect overlaps." + echo "" + echo " --no_version" + echo " type: boolean_true" + echo " Do not append version and command line information to the header." + echo "" + echo " -n, --naive" + echo " type: boolean_true" + echo " Concatenate files without recompression, a header check compatibility is" + echo " performed." + echo "" + echo " --naive_force" + echo " type: boolean_true" + echo " Same as --naive, but header compatibility is not checked." + echo " Dangerous, use with caution." + echo "" + echo " -O, --output_type" + echo " type: string" + echo " choices: [ u, z, b, v ]" + echo " Output type:" + echo " u: uncompressed BCF" + echo " z: compressed VCF" + echo " b: compressed BCF" + echo " v: uncompressed VCF" + echo "" + echo " -q, --min_PQ" + echo " type: integer" + echo " example: 30" + echo " Break phase set if phasing quality is lower than ." + echo "" + echo " -r, --regions" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Restrict to comma-separated list of regions." + echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." + echo "" + echo " -R, --regions_file" + echo " type: file, file must exist" + echo " Restrict to regions listed in a file." + echo " Regions can be specified either on a VCF, BED, or tab-delimited file" + echo " (the default)." + echo " For more information check manual." + echo "" + echo " --regions_overlap" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " This option controls how overlapping records are determined:" + echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" + echo " (this corresponds to the default behavior of -t/-T);" + echo " set to 'record' or '1' if also overlapping records with POS outside a" + echo " region should be included (this is the default behavior of -r/-R," + echo " and includes indels with POS at the end of a region, which are" + echo " technically outside the region);" + echo " or set to 'variant' or '2' to include only true overlapping variation" + echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" + echo " variation \"A>-\")." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcftools/bcftools_norm/.config.vsh.yaml b/target/executable/bcftools/bcftools_norm/.config.vsh.yaml index 6b1552e2..a203a903 100644 --- a/target/executable/bcftools/bcftools_norm/.config.vsh.yaml +++ b/target/executable/bcftools/bcftools_norm/.config.vsh.yaml @@ -302,6 +302,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -415,16 +418,16 @@ build_info: engine: "docker|native" output: "target/executable/bcftools/bcftools_norm" executable: "target/executable/bcftools/bcftools_norm/bcftools_norm" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcftools/bcftools_norm/bcftools_norm b/target/executable/bcftools/bcftools_norm/bcftools_norm index 3e78cb60..72c0c76a 100755 --- a/target/executable/bcftools/bcftools_norm/bcftools_norm +++ b/target/executable/bcftools/bcftools_norm/bcftools_norm @@ -2,7 +2,7 @@ # bcftools_norm main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,139 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcftools_norm main" - echo "" - echo "Left-align and normalize indels, check if REF alleles match the reference, split" - echo "multiallelic sites into multiple rows;" - echo "recover multiallelics from multiple rows." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input VCF/BCF file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output normalized VCF/BCF file." - echo "" - echo "Options:" - echo " -a, --atomize" - echo " type: boolean_true" - echo " Decompose complex variants (e.g., MNVs become consecutive SNVs)." - echo "" - echo " --atom_overlaps" - echo " type: string" - echo " choices: [ ., * ]" - echo " Use the star allele (*) for overlapping alleles or set to missing (.)." - echo "" - echo " -c, --check_ref" - echo " type: string" - echo " choices: [ e, w, x, s ]" - echo " Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad" - echo " sites." - echo "" - echo " -d, --remove_duplicates" - echo " type: string" - echo " choices: [ snps, indels, both, all, exact, none ]" - echo " Remove duplicate snps, indels, both, all, exact matches, or none (old -D" - echo " option)." - echo "" - echo " -f, --fasta_ref" - echo " type: file, file must exist" - echo " Reference fasta sequence file." - echo "" - echo " --force" - echo " type: boolean_true" - echo " Try to proceed even if malformed tags are encountered." - echo " Experimental, use at your own risk." - echo "" - echo " --keep_sum" - echo " type: string" - echo " Keep vector sum constant when splitting multiallelics (see github issue" - echo " #360)." - echo "" - echo " -m, --multiallelics" - echo " type: string" - echo " choices: [ +snps, +indels, +both, +any, -snps, -indels, -both, -any ]" - echo " Split multiallelics (-) or join biallelics (+), type: snps, indels," - echo " both, any [default: both]." - echo "" - echo " --no_version" - echo " type: boolean_true" - echo " Do not append version and command line information to the header." - echo "" - echo " -N, --do_not_normalize" - echo " type: boolean_true" - echo " Do not normalize indels (with -m or -c s)." - echo "" - echo " --O, --output_type" - echo " type: string" - echo " choices: [ u, z, b, v ]" - echo " Output type:" - echo " u: uncompressed BCF" - echo " z: compressed VCF" - echo " b: compressed BCF" - echo " v: uncompressed VCF" - echo "" - echo " --old_rec_tag" - echo " type: string" - echo " Annotate modified records with INFO/STR indicating the original variant." - echo "" - echo " --r, --regions" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Restrict to comma-separated list of regions." - echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." - echo "" - echo " --R, --regions_file" - echo " type: file, file must exist" - echo " Restrict to regions listed in a file." - echo " Regions can be specified either on a VCF, BED, or tab-delimited file" - echo " (the default)." - echo " For more information check manual." - echo "" - echo " --regions_overlap" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " This option controls how overlapping records are determined:" - echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" - echo " (this corresponds to the default behavior of -t/-T);" - echo " set to 'record' or '1' if also overlapping records with POS outside a" - echo " region should be included (this is the default behavior of -r/-R," - echo " and includes indels with POS at the end of a region, which are" - echo " technically outside the region);" - echo " or set to 'variant' or '2' to include only true overlapping variation" - echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" - echo " variation \"A>-\")." - echo "" - echo " -w, --site_win" - echo " type: integer" - echo " Buffer for sorting lines that changed position during realignment." - echo "" - echo " -s, --strict_filter" - echo " type: boolean_true" - echo " When merging (-m+), merged site is PASS only if all sites being merged" - echo " PASS." - echo "" - echo " -t, --targets" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Similar to --regions but streams rather than index-jumps." - echo "" - echo " -T, --targets_file" - echo " type: file, file must exist" - echo " Similar to --regions_file but streams rather than index-jumps." - echo "" - echo " --targets_overlap" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " Include if POS in the region (0), record overlaps (1), variant overlaps" - echo " (2)." - echo " Similar to --regions_overlap." -} # initialise variables VIASH_MODE='run' @@ -589,9 +456,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_norm" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:46Z" LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -706,6 +573,165 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcftools_norm main" + echo "" + echo "Left-align and normalize indels, check if REF alleles match the reference, split" + echo "multiallelic sites into multiple rows;" + echo "recover multiallelics from multiple rows." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input VCF/BCF file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output normalized VCF/BCF file." + echo "" + echo "Options:" + echo " -a, --atomize" + echo " type: boolean_true" + echo " Decompose complex variants (e.g., MNVs become consecutive SNVs)." + echo "" + echo " --atom_overlaps" + echo " type: string" + echo " choices: [ ., * ]" + echo " Use the star allele (*) for overlapping alleles or set to missing (.)." + echo "" + echo " -c, --check_ref" + echo " type: string" + echo " choices: [ e, w, x, s ]" + echo " Check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad" + echo " sites." + echo "" + echo " -d, --remove_duplicates" + echo " type: string" + echo " choices: [ snps, indels, both, all, exact, none ]" + echo " Remove duplicate snps, indels, both, all, exact matches, or none (old -D" + echo " option)." + echo "" + echo " -f, --fasta_ref" + echo " type: file, file must exist" + echo " Reference fasta sequence file." + echo "" + echo " --force" + echo " type: boolean_true" + echo " Try to proceed even if malformed tags are encountered." + echo " Experimental, use at your own risk." + echo "" + echo " --keep_sum" + echo " type: string" + echo " Keep vector sum constant when splitting multiallelics (see github issue" + echo " #360)." + echo "" + echo " -m, --multiallelics" + echo " type: string" + echo " choices: [ +snps, +indels, +both, +any, -snps, -indels, -both, -any ]" + echo " Split multiallelics (-) or join biallelics (+), type: snps, indels," + echo " both, any [default: both]." + echo "" + echo " --no_version" + echo " type: boolean_true" + echo " Do not append version and command line information to the header." + echo "" + echo " -N, --do_not_normalize" + echo " type: boolean_true" + echo " Do not normalize indels (with -m or -c s)." + echo "" + echo " --O, --output_type" + echo " type: string" + echo " choices: [ u, z, b, v ]" + echo " Output type:" + echo " u: uncompressed BCF" + echo " z: compressed VCF" + echo " b: compressed BCF" + echo " v: uncompressed VCF" + echo "" + echo " --old_rec_tag" + echo " type: string" + echo " Annotate modified records with INFO/STR indicating the original variant." + echo "" + echo " --r, --regions" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Restrict to comma-separated list of regions." + echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." + echo "" + echo " --R, --regions_file" + echo " type: file, file must exist" + echo " Restrict to regions listed in a file." + echo " Regions can be specified either on a VCF, BED, or tab-delimited file" + echo " (the default)." + echo " For more information check manual." + echo "" + echo " --regions_overlap" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " This option controls how overlapping records are determined:" + echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" + echo " (this corresponds to the default behavior of -t/-T);" + echo " set to 'record' or '1' if also overlapping records with POS outside a" + echo " region should be included (this is the default behavior of -r/-R," + echo " and includes indels with POS at the end of a region, which are" + echo " technically outside the region);" + echo " or set to 'variant' or '2' to include only true overlapping variation" + echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" + echo " variation \"A>-\")." + echo "" + echo " -w, --site_win" + echo " type: integer" + echo " Buffer for sorting lines that changed position during realignment." + echo "" + echo " -s, --strict_filter" + echo " type: boolean_true" + echo " When merging (-m+), merged site is PASS only if all sites being merged" + echo " PASS." + echo "" + echo " -t, --targets" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Similar to --regions but streams rather than index-jumps." + echo "" + echo " -T, --targets_file" + echo " type: file, file must exist" + echo " Similar to --regions_file but streams rather than index-jumps." + echo "" + echo " --targets_overlap" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " Include if POS in the region (0), record overlaps (1), variant overlaps" + echo " (2)." + echo " Similar to --regions_overlap." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcftools/bcftools_sort/.config.vsh.yaml b/target/executable/bcftools/bcftools_sort/.config.vsh.yaml index 3c4bd419..48d0ff9d 100644 --- a/target/executable/bcftools/bcftools_sort/.config.vsh.yaml +++ b/target/executable/bcftools/bcftools_sort/.config.vsh.yaml @@ -76,6 +76,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -184,16 +187,16 @@ build_info: engine: "docker|native" output: "target/executable/bcftools/bcftools_sort" executable: "target/executable/bcftools/bcftools_sort/bcftools_sort" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcftools/bcftools_sort/bcftools_sort b/target/executable/bcftools/bcftools_sort/bcftools_sort index 2b5f3881..7d17c81e 100755 --- a/target/executable/bcftools/bcftools_sort/bcftools_sort +++ b/target/executable/bcftools/bcftools_sort/bcftools_sort @@ -2,7 +2,7 @@ # bcftools_sort main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,33 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcftools_sort main" - echo "" - echo "Sorts VCF/BCF files." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input VCF/BCF file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output sorted VCF/BCF file." - echo "" - echo "Options:" - echo " -O, --output_type" - echo " type: string" - echo " choices: [ b, u, z, v ]" - echo " Compresses or uncompresses the output." - echo " The options are:" - echo " b: compressed BCF," - echo " u: uncompressed BCF," - echo " z: compressed VCF," - echo " v: uncompressed VCF." -} # initialise variables VIASH_MODE='run' @@ -483,9 +456,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_sort" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -600,6 +573,59 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcftools_sort main" + echo "" + echo "Sorts VCF/BCF files." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input VCF/BCF file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output sorted VCF/BCF file." + echo "" + echo "Options:" + echo " -O, --output_type" + echo " type: string" + echo " choices: [ b, u, z, v ]" + echo " Compresses or uncompresses the output." + echo " The options are:" + echo " b: compressed BCF," + echo " u: uncompressed BCF," + echo " z: compressed VCF," + echo " v: uncompressed VCF." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcftools/bcftools_stats/.config.vsh.yaml b/target/executable/bcftools/bcftools_stats/.config.vsh.yaml index 3cc2973e..9625263c 100644 --- a/target/executable/bcftools/bcftools_stats/.config.vsh.yaml +++ b/target/executable/bcftools/bcftools_stats/.config.vsh.yaml @@ -344,6 +344,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -457,16 +460,16 @@ build_info: engine: "docker|native" output: "target/executable/bcftools/bcftools_stats" executable: "target/executable/bcftools/bcftools_stats/bcftools_stats" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcftools/bcftools_stats/bcftools_stats b/target/executable/bcftools/bcftools_stats/bcftools_stats index 06a2d5e2..6097790f 100755 --- a/target/executable/bcftools/bcftools_stats/bcftools_stats +++ b/target/executable/bcftools/bcftools_stats/bcftools_stats @@ -2,7 +2,7 @@ # bcftools_stats main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,176 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcftools_stats main" - echo "" - echo "Parses VCF or BCF and produces a txt stats file which can be plotted using" - echo "plot-vcfstats." - echo "When two files are given, the program generates separate stats for intersection" - echo "and the complements. By default only sites are compared, -s/-S must given to" - echo "include" - echo "also sample columns." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Input VCF/BCF file. Maximum of two files." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output txt statistics file." - echo "" - echo "Options:" - echo " --af_bins, --allele_frequency_bins" - echo " type: string" - echo " example: 0.1,0.5,1" - echo " Allele frequency bins, a list of bin values (0.1,0.5,1)." - echo "" - echo " --af_bins_file, --allele_frequency_bins_file" - echo " type: file, file must exist" - echo " Same as allele_frequency_bins, but in a file." - echo " Format of file is one value per line." - echo " e.g." - echo " 0.1" - echo " 0.5" - echo " 1" - echo "" - echo " --af_tag, --allele_frequency_tag" - echo " type: string" - echo " Allele frequency tag to use, by default estimated from AN,AC or GT." - echo "" - echo " --first_only, --first_allele_only" - echo " type: boolean_true" - echo " Include only 1st allele at multiallelic sites." - echo "" - echo " --c, --collapse" - echo " type: string" - echo " choices: [ snps, indels, both, all, some, none ]" - echo " Treat as identical records with ." - echo " See https://samtools.github.io/bcftools/bcftools.html#common_options for" - echo " details." - echo "" - echo " --d, --depth" - echo " type: string" - echo " example: 0,500,1" - echo " Depth distribution: min,max,bin size." - echo "" - echo " --e, --exclude" - echo " type: string" - echo " example: QUAL < 30 && DP < 10" - echo " Exclude sites for which the expression is true." - echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" - echo " details." - echo "" - echo " --E, --exons" - echo " type: file, file must exist" - echo " tab-delimited file with exons for indel frameshifts statistics." - echo " The columns of the file are CHR, FROM, TO, with 1-based, inclusive," - echo " positions." - echo " The file is BGZF-compressed and indexed with tabix (e.g. tabix -s1 -b2" - echo " -e3 file.gz)." - echo "" - echo " --f, --apply_filters" - echo " type: string" - echo " Require at least one of the listed FILTER strings (e.g. \"PASS,.\")." - echo "" - echo " --F, --fasta_reference" - echo " type: file, file must exist" - echo " Faidx indexed reference sequence file to determine INDEL context." - echo "" - echo " --i, --include" - echo " type: string" - echo " example: QUAL >= 30 && DP >= 10" - echo " Select sites for which the expression is true." - echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" - echo " details." - echo "" - echo " --I, --split_by_ID" - echo " type: boolean_true" - echo " Collect stats for sites with ID separately (known vs novel)." - echo "" - echo " --r, --regions" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Restrict to comma-separated list of regions." - echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." - echo "" - echo " --R, --regions_file" - echo " type: file, file must exist" - echo " Restrict to regions listed in a file." - echo " Regions can be specified either on a VCF, BED, or tab-delimited file" - echo " (the default)." - echo " For more information check manual." - echo "" - echo " --regions_overlap" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " This option controls how overlapping records are determined:" - echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" - echo " (this corresponds to the default behavior of -t/-T);" - echo " set to 'record' or '1' if also overlapping records with POS outside a" - echo " region should be included (this is the default behavior of -r/-R," - echo " and includes indels with POS at the end of a region, which are" - echo " technically outside the region);" - echo " or set to 'variant' or '2' to include only true overlapping variation" - echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" - echo " variation \"A>-\")." - echo "" - echo " --s, --samples" - echo " type: string" - echo " List of samples for sample stats, \"-\" to include all samples." - echo "" - echo " --S, --samples_file" - echo " type: file, file must exist" - echo " File of samples to include." - echo " e.g." - echo " sample1 1" - echo " sample2 2" - echo " sample3 2" - echo "" - echo " --t, --targets" - echo " type: string" - echo " example: 20:1000000-2000000" - echo " Similar as -r, --regions, but the next position is accessed by streaming" - echo " the whole VCF/BCF" - echo " rather than using the tbi/csi index. Both -r and -t options can be" - echo " applied simultaneously: -r uses the" - echo " index to jump to a region and -t discards positions which are not in the" - echo " targets. Unlike -r, targets" - echo " can be prefixed with \"^\" to request logical complement. For example," - echo " \"^X,Y,MT\" indicates that" - echo " sequences X, Y and MT should be skipped. Yet another difference between" - echo " the -t/-T and -r/-R is" - echo " that -r/-R checks for proper overlaps and considers both POS and the end" - echo " position of an indel," - echo " while -t/-T considers the POS coordinate only (by default; see also" - echo " --regions-overlap and --targets-overlap)." - echo " Note that -t cannot be used in combination with -T." - echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." - echo "" - echo " --T, --targets_file" - echo " type: file, file must exist" - echo " Similar to --regions_file option but streams rather than index-jumps." - echo "" - echo " --targets_overlaps" - echo " type: string" - echo " choices: [ pos, record, variant, 0, 1, 2 ]" - echo " Include if POS in the region (0), record overlaps (1), variant overlaps" - echo " (2)." - echo "" - echo " --u, --user_tstv" - echo " type: string" - echo " Collect Ts/Tv stats for any tag using the given binning [0:1:100]." - echo " Format is ." - echo " A subfield can be selected as e.g. 'PV4[0]', here the first value of the" - echo " PV4 tag." - echo "" - echo " --v, --verbose" - echo " type: boolean_true" - echo " Produce verbose per-site and per-sample output." -} # initialise variables VIASH_MODE='run' @@ -626,9 +456,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_stats" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -743,6 +573,202 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcftools_stats main" + echo "" + echo "Parses VCF or BCF and produces a txt stats file which can be plotted using" + echo "plot-vcfstats." + echo "When two files are given, the program generates separate stats for intersection" + echo "and the complements. By default only sites are compared, -s/-S must given to" + echo "include" + echo "also sample columns." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Input VCF/BCF file. Maximum of two files." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output txt statistics file." + echo "" + echo "Options:" + echo " --af_bins, --allele_frequency_bins" + echo " type: string" + echo " example: 0.1,0.5,1" + echo " Allele frequency bins, a list of bin values (0.1,0.5,1)." + echo "" + echo " --af_bins_file, --allele_frequency_bins_file" + echo " type: file, file must exist" + echo " Same as allele_frequency_bins, but in a file." + echo " Format of file is one value per line." + echo " e.g." + echo " 0.1" + echo " 0.5" + echo " 1" + echo "" + echo " --af_tag, --allele_frequency_tag" + echo " type: string" + echo " Allele frequency tag to use, by default estimated from AN,AC or GT." + echo "" + echo " --first_only, --first_allele_only" + echo " type: boolean_true" + echo " Include only 1st allele at multiallelic sites." + echo "" + echo " --c, --collapse" + echo " type: string" + echo " choices: [ snps, indels, both, all, some, none ]" + echo " Treat as identical records with ." + echo " See https://samtools.github.io/bcftools/bcftools.html#common_options for" + echo " details." + echo "" + echo " --d, --depth" + echo " type: string" + echo " example: 0,500,1" + echo " Depth distribution: min,max,bin size." + echo "" + echo " --e, --exclude" + echo " type: string" + echo " example: QUAL < 30 && DP < 10" + echo " Exclude sites for which the expression is true." + echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" + echo " details." + echo "" + echo " --E, --exons" + echo " type: file, file must exist" + echo " tab-delimited file with exons for indel frameshifts statistics." + echo " The columns of the file are CHR, FROM, TO, with 1-based, inclusive," + echo " positions." + echo " The file is BGZF-compressed and indexed with tabix (e.g. tabix -s1 -b2" + echo " -e3 file.gz)." + echo "" + echo " --f, --apply_filters" + echo " type: string" + echo " Require at least one of the listed FILTER strings (e.g. \"PASS,.\")." + echo "" + echo " --F, --fasta_reference" + echo " type: file, file must exist" + echo " Faidx indexed reference sequence file to determine INDEL context." + echo "" + echo " --i, --include" + echo " type: string" + echo " example: QUAL >= 30 && DP >= 10" + echo " Select sites for which the expression is true." + echo " See https://samtools.github.io/bcftools/bcftools.html#expressions for" + echo " details." + echo "" + echo " --I, --split_by_ID" + echo " type: boolean_true" + echo " Collect stats for sites with ID separately (known vs novel)." + echo "" + echo " --r, --regions" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Restrict to comma-separated list of regions." + echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." + echo "" + echo " --R, --regions_file" + echo " type: file, file must exist" + echo " Restrict to regions listed in a file." + echo " Regions can be specified either on a VCF, BED, or tab-delimited file" + echo " (the default)." + echo " For more information check manual." + echo "" + echo " --regions_overlap" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " This option controls how overlapping records are determined:" + echo " set to 'pos' or '0' if the VCF record has to have POS inside a region" + echo " (this corresponds to the default behavior of -t/-T);" + echo " set to 'record' or '1' if also overlapping records with POS outside a" + echo " region should be included (this is the default behavior of -r/-R," + echo " and includes indels with POS at the end of a region, which are" + echo " technically outside the region);" + echo " or set to 'variant' or '2' to include only true overlapping variation" + echo " (compare the full VCF representation \"TA>T-\" vs the true sequence" + echo " variation \"A>-\")." + echo "" + echo " --s, --samples" + echo " type: string" + echo " List of samples for sample stats, \"-\" to include all samples." + echo "" + echo " --S, --samples_file" + echo " type: file, file must exist" + echo " File of samples to include." + echo " e.g." + echo " sample1 1" + echo " sample2 2" + echo " sample3 2" + echo "" + echo " --t, --targets" + echo " type: string" + echo " example: 20:1000000-2000000" + echo " Similar as -r, --regions, but the next position is accessed by streaming" + echo " the whole VCF/BCF" + echo " rather than using the tbi/csi index. Both -r and -t options can be" + echo " applied simultaneously: -r uses the" + echo " index to jump to a region and -t discards positions which are not in the" + echo " targets. Unlike -r, targets" + echo " can be prefixed with \"^\" to request logical complement. For example," + echo " \"^X,Y,MT\" indicates that" + echo " sequences X, Y and MT should be skipped. Yet another difference between" + echo " the -t/-T and -r/-R is" + echo " that -r/-R checks for proper overlaps and considers both POS and the end" + echo " position of an indel," + echo " while -t/-T considers the POS coordinate only (by default; see also" + echo " --regions-overlap and --targets-overlap)." + echo " Note that -t cannot be used in combination with -T." + echo " Following formats are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​]." + echo "" + echo " --T, --targets_file" + echo " type: file, file must exist" + echo " Similar to --regions_file option but streams rather than index-jumps." + echo "" + echo " --targets_overlaps" + echo " type: string" + echo " choices: [ pos, record, variant, 0, 1, 2 ]" + echo " Include if POS in the region (0), record overlaps (1), variant overlaps" + echo " (2)." + echo "" + echo " --u, --user_tstv" + echo " type: string" + echo " Collect Ts/Tv stats for any tag using the given binning [0:1:100]." + echo " Format is ." + echo " A subfield can be selected as e.g. 'PV4[0]', here the first value of the" + echo " PV4 tag." + echo "" + echo " --v, --verbose" + echo " type: boolean_true" + echo " Produce verbose per-site and per-sample output." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bcl_convert/.config.vsh.yaml b/target/executable/bcl_convert/.config.vsh.yaml index 03da7f15..66821963 100644 --- a/target/executable/bcl_convert/.config.vsh.yaml +++ b/target/executable/bcl_convert/.config.vsh.yaml @@ -290,6 +290,16 @@ argument_groups: direction: "output" multiple: false multiple_sep: ";" + - type: "boolean" + name: "--force" + description: "Allow destination directory to already exist and overwrite files.\n" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" @@ -303,6 +313,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -417,16 +430,16 @@ build_info: engine: "docker|native" output: "target/executable/bcl_convert" executable: "target/executable/bcl_convert/bcl_convert" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bcl_convert/bcl_convert b/target/executable/bcl_convert/bcl_convert index c8e2bc66..d73d6552 100755 --- a/target/executable/bcl_convert/bcl_convert +++ b/target/executable/bcl_convert/bcl_convert @@ -2,7 +2,7 @@ # bcl_convert main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -173,143 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcl_convert main" - echo "" - echo "Convert bcl files to fastq files using bcl-convert." - echo "Information about upgrading from bcl2fastq via" - echo "[Upgrading from bcl2fastq to BCL" - echo "Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)" - echo "and [BCL Convert Compatible" - echo "Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)" - echo "" - echo "Input arguments:" - echo " -i, --bcl_input_directory" - echo " type: file, required parameter, file must exist" - echo " example: bcl_dir" - echo " Input run directory" - echo "" - echo " -s, --sample_sheet" - echo " type: file, file must exist" - echo " example: bcl_dir/sample_sheet.csv" - echo " Path to SampleSheet.csv file (default searched for in" - echo " --bcl_input_directory)" - echo "" - echo " --run_info" - echo " type: file, file must exist" - echo " example: bcl_dir/RunInfo.xml" - echo " Path to RunInfo.xml file (default root of BCL input directory)" - echo "" - echo "Lane and tile settings:" - echo " --bcl_only_lane" - echo " type: integer" - echo " example: 1" - echo " Convert only specified lane number (default all lanes)" - echo "" - echo " --first_tile_only" - echo " type: boolean" - echo " example: true" - echo " Only convert first tile of input (for testing & debugging)" - echo "" - echo " --tiles" - echo " type: string" - echo " example: s_[0-9]+_1" - echo " Process only a subset of tiles by a regular expression" - echo "" - echo " --exclude_tiles" - echo " type: string" - echo " example: s_[0-9]+_1" - echo " Exclude set of tiles by a regular expression" - echo "" - echo "Resource arguments:" - echo " --shared_thread_odirect_output" - echo " type: boolean" - echo " example: true" - echo " Use linux native asynchronous io (io_submit) for file output" - echo " (Default=false)" - echo "" - echo " --bcl_num_parallel_tiles" - echo " type: integer" - echo " example: 1" - echo " \\# of tiles to process in parallel (default 1)" - echo "" - echo " --bcl_num_conversion_threads" - echo " type: integer" - echo " example: 1" - echo " \\# of threads for conversion (per tile, default # cpu threads)" - echo "" - echo " --bcl_num_compression_threads" - echo " type: integer" - echo " example: 1" - echo " \\# of threads for fastq.gz output compression (per tile, default # cpu" - echo " threads, or HW+12)" - echo "" - echo " --bcl_num_decompression_threads" - echo " type: integer" - echo " example: 1" - echo " \\# of threads for bcl/cbcl input decompression (per tile, default half #" - echo " cpu threads, or HW+8). Only applies when preloading files" - echo "" - echo "Run arguments:" - echo " --bcl_only_matched_reads" - echo " type: boolean" - echo " example: true" - echo " For pure BCL conversion, do not output files for 'Undetermined'" - echo " [unmatched] reads (output by default)" - echo "" - echo " --no_lane_splitting" - echo " type: boolean" - echo " example: true" - echo " Do not split FASTQ file by lane (false by default)" - echo "" - echo " --num_unknown_barcodes_reported" - echo " type: integer" - echo " example: 1000" - echo " \\# of Top Unknown Barcodes to output (1000 by default)" - echo "" - echo " --bcl_validate_sample_sheet_only" - echo " type: boolean" - echo " example: true" - echo " Only validate RunInfo.xml & SampleSheet files (produce no FASTQ files)" - echo "" - echo " --strict_mode" - echo " type: boolean" - echo " example: true" - echo " Abort if any files are missing (false by default)" - echo "" - echo " --sample_name_column_enabled" - echo " type: boolean" - echo " example: true" - echo " Use sample sheet 'Sample_Name' column when naming fastq files &" - echo " subdirectories" - echo "" - echo "Output arguments:" - echo " -o, --output_directory" - echo " type: file, required parameter, output, file must exist" - echo " example: fastq_dir" - echo " Output directory containig fastq files" - echo "" - echo " --bcl_sampleproject_subdirectories" - echo " type: boolean" - echo " example: true" - echo " Output to subdirectories based upon sample sheet 'Sample_Project' column" - echo "" - echo " --fastq_gzip_compression_level" - echo " type: integer" - echo " example: 1" - echo " Set fastq output compression level 0-9 (default 1)" - echo "" - echo " --reports" - echo " type: file, output, file must exist" - echo " example: reports_dir" - echo " Reports directory" - echo "" - echo " --logs" - echo " type: file, output, file must exist" - echo " example: logs_dir" - echo " Reports directory" -} # initialise variables VIASH_MODE='run' @@ -599,9 +462,9 @@ RUN echo "bcl-convert: \"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/ s/ LABEL org.opencontainers.image.authors="Toni Verbeiren, Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component bcl_convert" -LABEL org.opencontainers.image.created="2024-12-03T10:34:01Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -716,6 +579,174 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcl_convert main" + echo "" + echo "Convert bcl files to fastq files using bcl-convert." + echo "Information about upgrading from bcl2fastq via" + echo "[Upgrading from bcl2fastq to BCL" + echo "Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)" + echo "and [BCL Convert Compatible" + echo "Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)" + echo "" + echo "Input arguments:" + echo " -i, --bcl_input_directory" + echo " type: file, required parameter, file must exist" + echo " example: bcl_dir" + echo " Input run directory" + echo "" + echo " -s, --sample_sheet" + echo " type: file, file must exist" + echo " example: bcl_dir/sample_sheet.csv" + echo " Path to SampleSheet.csv file (default searched for in" + echo " --bcl_input_directory)" + echo "" + echo " --run_info" + echo " type: file, file must exist" + echo " example: bcl_dir/RunInfo.xml" + echo " Path to RunInfo.xml file (default root of BCL input directory)" + echo "" + echo "Lane and tile settings:" + echo " --bcl_only_lane" + echo " type: integer" + echo " example: 1" + echo " Convert only specified lane number (default all lanes)" + echo "" + echo " --first_tile_only" + echo " type: boolean" + echo " example: true" + echo " Only convert first tile of input (for testing & debugging)" + echo "" + echo " --tiles" + echo " type: string" + echo " example: s_[0-9]+_1" + echo " Process only a subset of tiles by a regular expression" + echo "" + echo " --exclude_tiles" + echo " type: string" + echo " example: s_[0-9]+_1" + echo " Exclude set of tiles by a regular expression" + echo "" + echo "Resource arguments:" + echo " --shared_thread_odirect_output" + echo " type: boolean" + echo " example: true" + echo " Use linux native asynchronous io (io_submit) for file output" + echo " (Default=false)" + echo "" + echo " --bcl_num_parallel_tiles" + echo " type: integer" + echo " example: 1" + echo " \\# of tiles to process in parallel (default 1)" + echo "" + echo " --bcl_num_conversion_threads" + echo " type: integer" + echo " example: 1" + echo " \\# of threads for conversion (per tile, default # cpu threads)" + echo "" + echo " --bcl_num_compression_threads" + echo " type: integer" + echo " example: 1" + echo " \\# of threads for fastq.gz output compression (per tile, default # cpu" + echo " threads, or HW+12)" + echo "" + echo " --bcl_num_decompression_threads" + echo " type: integer" + echo " example: 1" + echo " \\# of threads for bcl/cbcl input decompression (per tile, default half #" + echo " cpu threads, or HW+8). Only applies when preloading files" + echo "" + echo "Run arguments:" + echo " --bcl_only_matched_reads" + echo " type: boolean" + echo " example: true" + echo " For pure BCL conversion, do not output files for 'Undetermined'" + echo " [unmatched] reads (output by default)" + echo "" + echo " --no_lane_splitting" + echo " type: boolean" + echo " example: true" + echo " Do not split FASTQ file by lane (false by default)" + echo "" + echo " --num_unknown_barcodes_reported" + echo " type: integer" + echo " example: 1000" + echo " \\# of Top Unknown Barcodes to output (1000 by default)" + echo "" + echo " --bcl_validate_sample_sheet_only" + echo " type: boolean" + echo " example: true" + echo " Only validate RunInfo.xml & SampleSheet files (produce no FASTQ files)" + echo "" + echo " --strict_mode" + echo " type: boolean" + echo " example: true" + echo " Abort if any files are missing (false by default)" + echo "" + echo " --sample_name_column_enabled" + echo " type: boolean" + echo " example: true" + echo " Use sample sheet 'Sample_Name' column when naming fastq files &" + echo " subdirectories" + echo "" + echo "Output arguments:" + echo " -o, --output_directory" + echo " type: file, required parameter, output, file must exist" + echo " example: fastq_dir" + echo " Output directory containig fastq files" + echo "" + echo " --bcl_sampleproject_subdirectories" + echo " type: boolean" + echo " example: true" + echo " Output to subdirectories based upon sample sheet 'Sample_Project' column" + echo "" + echo " --fastq_gzip_compression_level" + echo " type: integer" + echo " example: 1" + echo " Set fastq output compression level 0-9 (default 1)" + echo "" + echo " --reports" + echo " type: file, output, file must exist" + echo " example: reports_dir" + echo " Reports directory" + echo "" + echo " --logs" + echo " type: file, output, file must exist" + echo " example: logs_dir" + echo " Reports directory" + echo "" + echo " --force" + echo " type: boolean" + echo " example: true" + echo " Allow destination directory to already exist and overwrite files." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' @@ -1012,6 +1043,17 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_LOGS=$(ViashRemoveFlags "$1") shift 1 ;; + --force) + [ -n "$VIASH_PAR_FORCE" ] && ViashError Bad arguments for option \'--force\': \'$VIASH_PAR_FORCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --force. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --force=*) + [ -n "$VIASH_PAR_FORCE" ] && ViashError Bad arguments for option \'--force=*\': \'$VIASH_PAR_FORCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE=$(ViashRemoveFlags "$1") + shift 1 + ;; ---engine) VIASH_ENGINE_ID="$2" shift 2 @@ -1322,6 +1364,12 @@ if [[ -n "$VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL" ]]; then exit 1 fi fi +if [[ -n "$VIASH_PAR_FORCE" ]]; then + if ! [[ "$VIASH_PAR_FORCE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--force' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi if [[ -n "$VIASH_META_CPUS" ]]; then if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. @@ -1537,6 +1585,7 @@ $( if [ ! -z ${VIASH_PAR_BCL_SAMPLEPROJECT_SUBDIRECTORIES+x} ]; then echo "${VIA $( if [ ! -z ${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL+x} ]; then echo "${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_gzip_compression_level='&'#" ; else echo "# par_fastq_gzip_compression_level="; fi ) $( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) $( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_FORCE+x} ]; then echo "${VIASH_PAR_FORCE}" | sed "s#'#'\"'\"'#g;s#.*#par_force='&'#" ; else echo "# par_force="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -1561,9 +1610,13 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -eo pipefail +[[ "\$par_force" == "false" ]] && unset par_force + + \$(which bcl-convert) \\ --bcl-input-directory "\$par_bcl_input_directory" \\ --output-directory "\$par_output_directory" \\ + \${par_force:+--force} \\ \${par_sample_sheet:+ --sample-sheet "\$par_sample_sheet"} \\ \${par_run_info:+ --run-info "\$par_run_info"} \\ \${par_bcl_only_lane:+ --bcl-only-lane "\$par_bcl_only_lane"} \\ diff --git a/target/executable/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml b/target/executable/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml index 4b94e028..225fbbf3 100644 --- a/target/executable/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml +++ b/target/executable/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml @@ -159,6 +159,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -273,16 +276,16 @@ build_info: engine: "docker|native" output: "target/executable/bd_rhapsody/bd_rhapsody_make_reference" executable: "target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference b/target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference index 793bbbb8..be802d32 100755 --- a/target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference +++ b/target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference @@ -2,7 +2,7 @@ # bd_rhapsody_make_reference main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -173,95 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bd_rhapsody_make_reference main" - echo "" - echo "The Reference Files Generator creates an archive containing Genome Index" - echo "and Transcriptome annotation files needed for the BD Rhapsody Sequencing" - echo "Analysis Pipeline. The app takes as input one or more FASTA and GTF files" - echo "and produces a compressed archive in the form of a tar.gz file. The" - echo "archive contains:" - echo "" - echo "- STAR index" - echo "- Filtered GTF file" - echo "" - echo "Inputs:" - echo " --genome_fasta" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: genome_sequence.fa.gz" - echo " Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody" - echo " Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse." - echo "" - echo " --gtf" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: transcriptome_annotation.gtf.gz" - echo " File path to the transcript annotation files in GTF or GTF.GZ format." - echo " The Sequence Analysis Pipeline requires the 'gene_name' or" - echo " 'gene_id' attribute to be set on each gene and exon feature. Gene and" - echo " exon feature lines must have the same attribute, and exons" - echo " must have a corresponding gene with the same value. For TCR/BCR assays," - echo " the TCR or BCR gene segments must have the 'gene_type' or" - echo " 'gene_biotype' attribute set, and the value should begin with 'TR' or" - echo " 'IG', respectively." - echo "" - echo " --extra_sequences" - echo " type: file, multiple values allowed, file must exist" - echo " File path to additional sequences in FASTA format to use when building" - echo " the STAR index. (e.g. transgenes or CRISPR guide barcodes)." - echo " GTF lines for these sequences will be automatically generated and" - echo " combined with the main GTF." - echo "" - echo "Outputs:" - echo " --reference_archive" - echo " type: file, required parameter, output, file must exist" - echo " example: star_index.tar.gz" - echo " A Compressed archive containing the Reference Genome Index and" - echo " annotation GTF files. This archive is meant to be used as an" - echo " input in the BD Rhapsody Sequencing Analysis Pipeline." - echo "" - echo "Arguments:" - echo " --mitochondrial_contigs" - echo " type: string, multiple values allowed" - echo " default: chrM;chrMT;M;MT" - echo " Names of the Mitochondrial contigs in the provided Reference Genome." - echo " Fragments originating from contigs other than these are" - echo " identified as 'nuclear fragments' in the ATACseq analysis pipeline." - echo "" - echo " --filtering_off" - echo " type: boolean_true" - echo " By default the input Transcript Annotation files are filtered based on" - echo " the gene_type/gene_biotype attribute. Only features" - echo " having the following attribute values are kept:" - echo " - protein_coding" - echo " - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)" - echo " - IG_LV_gene" - echo " - IG_V_gene" - echo " - IG_V_pseudogene" - echo " - IG_D_gene" - echo " - IG_J_gene" - echo " - IG_J_pseudogene" - echo " - IG_C_gene" - echo " - IG_C_pseudogene" - echo " - TR_V_gene" - echo " - TR_V_pseudogene" - echo " - TR_D_gene" - echo " - TR_J_gene" - echo " - TR_J_pseudogene" - echo " - TR_C_gene" - echo " If you have already pre-filtered the input Annotation files and/or" - echo " wish to turn-off the filtering, please set this option to True." - echo "" - echo " --wta_only_index" - echo " type: boolean_true" - echo " Build a WTA only index, otherwise builds a WTA + ATAC index." - echo "" - echo " --extra_star_params" - echo " type: string" - echo " example: --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11" - echo " Additional parameters to pass to STAR when building the genome index." - echo " Specify exactly like how you would on the command line." -} # initialise variables VIASH_MODE='run' @@ -554,9 +465,9 @@ RUN VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1) RUN echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component bd_rhapsody bd_rhapsody_make_reference" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" LABEL org.opencontainers.image.source="https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -671,6 +582,121 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bd_rhapsody_make_reference main" + echo "" + echo "The Reference Files Generator creates an archive containing Genome Index" + echo "and Transcriptome annotation files needed for the BD Rhapsody Sequencing" + echo "Analysis Pipeline. The app takes as input one or more FASTA and GTF files" + echo "and produces a compressed archive in the form of a tar.gz file. The" + echo "archive contains:" + echo "" + echo "- STAR index" + echo "- Filtered GTF file" + echo "" + echo "Inputs:" + echo " --genome_fasta" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: genome_sequence.fa.gz" + echo " Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody" + echo " Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse." + echo "" + echo " --gtf" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: transcriptome_annotation.gtf.gz" + echo " File path to the transcript annotation files in GTF or GTF.GZ format." + echo " The Sequence Analysis Pipeline requires the 'gene_name' or" + echo " 'gene_id' attribute to be set on each gene and exon feature. Gene and" + echo " exon feature lines must have the same attribute, and exons" + echo " must have a corresponding gene with the same value. For TCR/BCR assays," + echo " the TCR or BCR gene segments must have the 'gene_type' or" + echo " 'gene_biotype' attribute set, and the value should begin with 'TR' or" + echo " 'IG', respectively." + echo "" + echo " --extra_sequences" + echo " type: file, multiple values allowed, file must exist" + echo " File path to additional sequences in FASTA format to use when building" + echo " the STAR index. (e.g. transgenes or CRISPR guide barcodes)." + echo " GTF lines for these sequences will be automatically generated and" + echo " combined with the main GTF." + echo "" + echo "Outputs:" + echo " --reference_archive" + echo " type: file, required parameter, output, file must exist" + echo " example: star_index.tar.gz" + echo " A Compressed archive containing the Reference Genome Index and" + echo " annotation GTF files. This archive is meant to be used as an" + echo " input in the BD Rhapsody Sequencing Analysis Pipeline." + echo "" + echo "Arguments:" + echo " --mitochondrial_contigs" + echo " type: string, multiple values allowed" + echo " default: chrM;chrMT;M;MT" + echo " Names of the Mitochondrial contigs in the provided Reference Genome." + echo " Fragments originating from contigs other than these are" + echo " identified as 'nuclear fragments' in the ATACseq analysis pipeline." + echo "" + echo " --filtering_off" + echo " type: boolean_true" + echo " By default the input Transcript Annotation files are filtered based on" + echo " the gene_type/gene_biotype attribute. Only features" + echo " having the following attribute values are kept:" + echo " - protein_coding" + echo " - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)" + echo " - IG_LV_gene" + echo " - IG_V_gene" + echo " - IG_V_pseudogene" + echo " - IG_D_gene" + echo " - IG_J_gene" + echo " - IG_J_pseudogene" + echo " - IG_C_gene" + echo " - IG_C_pseudogene" + echo " - TR_V_gene" + echo " - TR_V_pseudogene" + echo " - TR_D_gene" + echo " - TR_J_gene" + echo " - TR_J_pseudogene" + echo " - TR_C_gene" + echo " If you have already pre-filtered the input Annotation files and/or" + echo " wish to turn-off the filtering, please set this option to True." + echo "" + echo " --wta_only_index" + echo " type: boolean_true" + echo " Build a WTA only index, otherwise builds a WTA + ATAC index." + echo "" + echo " --extra_star_params" + echo " type: string" + echo " example: --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11" + echo " Additional parameters to pass to STAR when building the genome index." + echo " Specify exactly like how you would on the command line." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml b/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml index 009876f7..bb09ea18 100644 --- a/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml +++ b/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml @@ -989,6 +989,9 @@ test_resources: path: "helpers" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1114,16 +1117,16 @@ build_info: engine: "docker|native" output: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis" executable: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis b/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis index f98066ab..7cda8bae 100755 --- a/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis +++ b/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis @@ -2,7 +2,7 @@ # bd_rhapsody_sequence_analysis main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -173,6 +173,416 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM bdgenomics/rhapsody:2.2.1 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "cwlref-runner" "cwl-runner" + +RUN mkdir /var/bd_rhapsody_cwl && \ + cd /var/bd_rhapsody_cwl && \ + git clone https://bitbucket.org/CRSwDev/cwl.git . && \ + git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de + +RUN VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1) +RUN echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt +LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Weiwei Schultz" +LABEL org.opencontainers.image.description="Companion container for running component bd_rhapsody bd_rhapsody_sequence_analysis" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" +LABEL org.opencontainers.image.source="https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "bd_rhapsody_sequence_analysis main" @@ -661,416 +1071,32 @@ function ViashHelp { echo "" echo " --write_filtered_reads" echo " type: boolean" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM bdgenomics/rhapsody:2.2.1 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "cwlref-runner" "cwl-runner" - -RUN mkdir /var/bd_rhapsody_cwl && \ - cd /var/bd_rhapsody_cwl && \ - git clone https://bitbucket.org/CRSwDev/cwl.git . && \ - git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de - -RUN VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1) -RUN echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt -LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Weiwei Schultz" -LABEL org.opencontainers.image.description="Companion container for running component bd_rhapsody bd_rhapsody_sequence_analysis" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" -LABEL org.opencontainers.image.source="https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_bamtobed/.config.vsh.yaml b/target/executable/bedtools/bedtools_bamtobed/.config.vsh.yaml index 468bb595..88ef0fcc 100644 --- a/target/executable/bedtools/bedtools_bamtobed/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_bamtobed/.config.vsh.yaml @@ -124,6 +124,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,16 +237,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_bamtobed" executable: "target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed b/target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed index 146ef3fc..9770fb9b 100755 --- a/target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed +++ b/target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed @@ -2,7 +2,7 @@ # bedtools_bamtobed main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,72 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_bamtobed main" - echo "" - echo "Converts BAM alignments to BED6 or BEDPE format." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input BAM file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output BED file." - echo "" - echo "Options:" - echo " --bedpe" - echo " type: boolean_true" - echo " Write BEDPE format. Requires BAM to be grouped or sorted by query." - echo "" - echo " --mate1" - echo " type: boolean_true" - echo " When writing BEDPE (-bedpe) format, always report mate one as the first" - echo " BEDPE \"block\"." - echo "" - echo " --bed12" - echo " type: boolean_true" - echo " Write \"blocked\" BED format (aka \"BED12\"). Forces -split." - echo " See http://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" - echo "" - echo " --split" - echo " type: boolean_true" - echo " Report \"split\" BAM alignments as separate BED entries." - echo " Splits only on N CIGAR operations." - echo "" - echo " --splitD" - echo " type: boolean_true" - echo " Split alignments based on N and D CIGAR operators." - echo " Forces -split." - echo "" - echo " -ed, --edit_distance" - echo " type: boolean_true" - echo " Use BAM edit distance (NM tag) for BED score." - echo " - Default for BED is to use mapping quality." - echo " - Default for BEDPE is to use the minimum of" - echo " the two mapping qualities for the pair." - echo " - When -ed is used with -bedpe, the total edit" - echo " distance from the two mates is reported." - echo "" - echo " --tag" - echo " type: string" - echo " example: SM" - echo " Use other NUMERIC BAM alignment tag for BED score." - echo " Default for BED is to use mapping quality. Disallowed with BEDPE output." - echo "" - echo " --color" - echo " type: string" - echo " example: 250,250,250" - echo " An R,G,B string for the color used with BED12 format." - echo " Default is (255,0,0)." - echo "" - echo " --cigar" - echo " type: boolean_true" - echo " Add the CIGAR string to the BED entry as a 7th column." -} # initialise variables VIASH_MODE='run' @@ -522,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bamtobed" -LABEL org.opencontainers.image.created="2024-12-03T10:34:01Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -639,6 +573,98 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_bamtobed main" + echo "" + echo "Converts BAM alignments to BED6 or BEDPE format." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input BAM file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output BED file." + echo "" + echo "Options:" + echo " --bedpe" + echo " type: boolean_true" + echo " Write BEDPE format. Requires BAM to be grouped or sorted by query." + echo "" + echo " --mate1" + echo " type: boolean_true" + echo " When writing BEDPE (-bedpe) format, always report mate one as the first" + echo " BEDPE \"block\"." + echo "" + echo " --bed12" + echo " type: boolean_true" + echo " Write \"blocked\" BED format (aka \"BED12\"). Forces -split." + echo " See http://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" + echo "" + echo " --split" + echo " type: boolean_true" + echo " Report \"split\" BAM alignments as separate BED entries." + echo " Splits only on N CIGAR operations." + echo "" + echo " --splitD" + echo " type: boolean_true" + echo " Split alignments based on N and D CIGAR operators." + echo " Forces -split." + echo "" + echo " -ed, --edit_distance" + echo " type: boolean_true" + echo " Use BAM edit distance (NM tag) for BED score." + echo " - Default for BED is to use mapping quality." + echo " - Default for BEDPE is to use the minimum of" + echo " the two mapping qualities for the pair." + echo " - When -ed is used with -bedpe, the total edit" + echo " distance from the two mates is reported." + echo "" + echo " --tag" + echo " type: string" + echo " example: SM" + echo " Use other NUMERIC BAM alignment tag for BED score." + echo " Default for BED is to use mapping quality. Disallowed with BEDPE output." + echo "" + echo " --color" + echo " type: string" + echo " example: 250,250,250" + echo " An R,G,B string for the color used with BED12 format." + echo " Default is (255,0,0)." + echo "" + echo " --cigar" + echo " type: boolean_true" + echo " Add the CIGAR string to the BED entry as a 7th column." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_bamtofastq/.config.vsh.yaml b/target/executable/bedtools/bedtools_bamtofastq/.config.vsh.yaml index dbfede3a..15ae4813 100644 --- a/target/executable/bedtools/bedtools_bamtofastq/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_bamtofastq/.config.vsh.yaml @@ -78,6 +78,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -186,16 +189,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_bamtofastq" executable: "target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq b/target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq index c982db9a..5e970cd2 100755 --- a/target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq +++ b/target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq @@ -2,7 +2,7 @@ # bedtools_bamtofastq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,33 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_bamtofastq main" - echo "" - echo "Conversion tool for extracting FASTQ records from sequence alignments in BAM" - echo "format." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input BAM file to be converted to FASTQ." - echo "" - echo "Outputs:" - echo " -fq, --fastq" - echo " type: file, required parameter, output, file must exist" - echo " Output FASTQ file." - echo "" - echo " -fq2, --fastq2" - echo " type: file, output, file must exist" - echo " FASTQ for second end. Used if BAM contains paired-end data." - echo " BAM should be sorted by query name is creating paired FASTQ." - echo "" - echo "Options:" - echo " --tags" - echo " type: boolean_true" - echo " Create FASTQ based on the mate info in the BAM R2 and Q2 tags." -} # initialise variables VIASH_MODE='run' @@ -483,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bamtofastq" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -600,6 +573,59 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_bamtofastq main" + echo "" + echo "Conversion tool for extracting FASTQ records from sequence alignments in BAM" + echo "format." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input BAM file to be converted to FASTQ." + echo "" + echo "Outputs:" + echo " -fq, --fastq" + echo " type: file, required parameter, output, file must exist" + echo " Output FASTQ file." + echo "" + echo " -fq2, --fastq2" + echo " type: file, output, file must exist" + echo " FASTQ for second end. Used if BAM contains paired-end data." + echo " BAM should be sorted by query name is creating paired FASTQ." + echo "" + echo "Options:" + echo " --tags" + echo " type: boolean_true" + echo " Create FASTQ based on the mate info in the BAM R2 and Q2 tags." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_bed12tobed6/.config.vsh.yaml b/target/executable/bedtools/bedtools_bed12tobed6/.config.vsh.yaml index d64b9723..ef96e839 100644 --- a/target/executable/bedtools/bedtools_bed12tobed6/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_bed12tobed6/.config.vsh.yaml @@ -67,6 +67,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -175,16 +178,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_bed12tobed6" executable: "target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6 b/target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6 index 4240d74f..a4b878d0 100755 --- a/target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6 +++ b/target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6 @@ -2,7 +2,7 @@ # bedtools_bed12tobed6 main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,30 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_bed12tobed6 main" - echo "" - echo "Converts BED features in BED12 (a.k.a. “blocked” BED features such as genes) to" - echo "discrete BED6 features." - echo "For example, in the case of a gene with six exons, bed12ToBed6 would create six" - echo "separate BED6 features (i.e., one for each exon)." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input BED12 file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " Output BED6 file to be written." - echo "" - echo "Options:" - echo " -n, --n_score" - echo " type: boolean_true" - echo " Force the score to be the (1-based) block number from the BED12." -} # initialise variables VIASH_MODE='run' @@ -480,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bed12tobed6" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -597,6 +573,56 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_bed12tobed6 main" + echo "" + echo "Converts BED features in BED12 (a.k.a. “blocked” BED features such as genes) to" + echo "discrete BED6 features." + echo "For example, in the case of a gene with six exons, bed12ToBed6 would create six" + echo "separate BED6 features (i.e., one for each exon)." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input BED12 file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " Output BED6 file to be written." + echo "" + echo "Options:" + echo " -n, --n_score" + echo " type: boolean_true" + echo " Force the score to be the (1-based) block number from the BED12." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_bedtobam/.config.vsh.yaml b/target/executable/bedtools/bedtools_bedtobam/.config.vsh.yaml index 13ea77e9..5ef81590 100644 --- a/target/executable/bedtools/bedtools_bedtobam/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_bedtobam/.config.vsh.yaml @@ -98,6 +98,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -213,16 +216,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_bedtobam" executable: "target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam b/target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam index e908f0bf..c550c110 100755 --- a/target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam +++ b/target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam @@ -2,7 +2,7 @@ # bedtools_bedtobam main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,46 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_bedtobam main" - echo "" - echo "Converts feature records (bed/gff/vcf) to BAM format." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input file (bed/gff/vcf)." - echo "" - echo " -g, --genome" - echo " type: file, required parameter, file must exist" - echo " Input genome file." - echo " NOTE: This is not a fasta file. This is a two-column tab-delimited file" - echo " where the first column is the chromosome name and the second their" - echo " sizes." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " Output BAM file to be written." - echo "" - echo "Options:" - echo " -mapq, --map_quality" - echo " type: integer" - echo " default: 255" - echo " min: 0" - echo " max: 255" - echo " Set the mappinq quality for the BAM records." - echo "" - echo " --bed12" - echo " type: boolean_true" - echo " The BED file is in BED12 format. The BAM CIGAR" - echo " string will reflect BED \"blocks\"." - echo "" - echo " -ubam, --uncompress_bam" - echo " type: boolean_true" - echo " Write uncompressed BAM output. Default writes compressed BAM." -} # initialise variables VIASH_MODE='run' @@ -496,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bedtobam" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -613,6 +573,72 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_bedtobam main" + echo "" + echo "Converts feature records (bed/gff/vcf) to BAM format." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input file (bed/gff/vcf)." + echo "" + echo " -g, --genome" + echo " type: file, required parameter, file must exist" + echo " Input genome file." + echo " NOTE: This is not a fasta file. This is a two-column tab-delimited file" + echo " where the first column is the chromosome name and the second their" + echo " sizes." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " Output BAM file to be written." + echo "" + echo "Options:" + echo " -mapq, --map_quality" + echo " type: integer" + echo " default: 255" + echo " min: 0" + echo " max: 255" + echo " Set the mappinq quality for the BAM records." + echo "" + echo " --bed12" + echo " type: boolean_true" + echo " The BED file is in BED12 format. The BAM CIGAR" + echo " string will reflect BED \"blocks\"." + echo "" + echo " -ubam, --uncompress_bam" + echo " type: boolean_true" + echo " Write uncompressed BAM output. Default writes compressed BAM." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_genomecov/.config.vsh.yaml b/target/executable/bedtools/bedtools_genomecov/.config.vsh.yaml index bad39651..10960ab1 100644 --- a/target/executable/bedtools/bedtools_genomecov/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_genomecov/.config.vsh.yaml @@ -226,6 +226,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -336,16 +339,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_genomecov" executable: "target/executable/bedtools/bedtools_genomecov/bedtools_genomecov" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_genomecov/bedtools_genomecov b/target/executable/bedtools/bedtools_genomecov/bedtools_genomecov index 8a28fb4f..6c91a29b 100755 --- a/target/executable/bedtools/bedtools_genomecov/bedtools_genomecov +++ b/target/executable/bedtools/bedtools_genomecov/bedtools_genomecov @@ -2,7 +2,7 @@ # bedtools_genomecov main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,141 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_genomecov main" - echo "" - echo "Compute the coverage of a feature file among a genome." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, file must exist" - echo " example: input.bed" - echo " The input file (BED/GFF/VCF) to be used." - echo "" - echo " -ibam, --input_bam" - echo " type: file, file must exist" - echo " The input file is in BAM format." - echo " Note: BAM _must_ be sorted by positions." - echo " '--genome' option is ignored if you use '--input_bam' option!" - echo "" - echo " -g, --genome" - echo " type: file, file must exist" - echo " example: genome.txt" - echo " The genome file to be used." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.bed" - echo " The output BED file." - echo "" - echo "Options:" - echo " -d, --depth" - echo " type: boolean_true" - echo " Report the depth at each genome position (with one-based coordinates)." - echo " Default behavior is to report a histogram." - echo "" - echo " -dz, --depth_zero" - echo " type: boolean_true" - echo " Report the depth at each genome position (with zero-based coordinates)." - echo " Reports only non-zero positions." - echo " Default behavior is to report a histogram." - echo "" - echo " -bg, --bed_graph" - echo " type: boolean_true" - echo " Report depth in BedGraph format. For details, see:" - echo " genome.ucsc.edu/goldenPath/help/bedgraph.html" - echo "" - echo " -bga, --bed_graph_zero_coverage" - echo " type: boolean_true" - echo " Report depth in BedGraph format, as above (-bg)." - echo " However with this option, regions with zero" - echo " coverage are also reported. This allows one to" - echo " quickly extract all regions of a genome with 0" - echo " coverage by applying: \"grep -w 0\$\" to the output." - echo "" - echo " --split" - echo " type: boolean_true" - echo " Treat \"split\" BAM or BED12 entries as distinct BED intervals." - echo " when computing coverage." - echo " For BAM files, this uses the CIGAR \"N\" and \"D\" operations" - echo " to infer the blocks for computing coverage." - echo " For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds" - echo " fields (i.e., columns 10,11,12)." - echo "" - echo " -ignoreD, --ignore_deletion" - echo " type: boolean_true" - echo " Ignore local deletions (CIGAR \"D\" operations) in BAM entries" - echo " when computing coverage." - echo "" - echo " --strand" - echo " type: string" - echo " choices: [ +, - ]" - echo " Calculate coverage of intervals from a specific strand." - echo " With BED files, requires at least 6 columns (strand is column 6)." - echo "" - echo " -pc, --pair_end_coverage" - echo " type: boolean_true" - echo " Calculate coverage of pair-end fragments." - echo " Works for BAM files only" - echo "" - echo " -fs, --fragment_size" - echo " type: boolean_true" - echo " Force to use provided fragment size instead of read length" - echo " Works for BAM files only" - echo "" - echo " --du" - echo " type: boolean_true" - echo " Change strand af the mate read (so both reads from the same strand)" - echo " useful for strand specific" - echo " Works for BAM files only" - echo "" - echo " -5, --five_prime" - echo " type: boolean_true" - echo " Calculate coverage of 5\" positions (instead of entire interval)." - echo "" - echo " -3, --three_prime" - echo " type: boolean_true" - echo " Calculate coverage of 3\" positions (instead of entire interval)." - echo "" - echo " --max" - echo " type: integer" - echo " min: 0" - echo " Combine all positions with a depth >= max into" - echo " a single bin in the histogram. Irrelevant" - echo " for -d and -bedGraph" - echo " - (INTEGER)" - echo "" - echo " --scale" - echo " type: double" - echo " min: 0.0" - echo " Scale the coverage by a constant factor." - echo " Each coverage value is multiplied by this factor before being reported." - echo " Useful for normalizing coverage by, e.g., reads per million (RPM)." - echo " - Default is 1.0; i.e., unscaled." - echo " - (FLOAT)" - echo "" - echo " --trackline" - echo " type: boolean_true" - echo " Adds a UCSC/Genome-Browser track line definition in the first line of" - echo " the output." - echo " - See here for more details about track line definition:" - echo " http://genome.ucsc.edu/goldenPath/help/bedgraph.html" - echo " - NOTE: When adding a trackline definition, the output BedGraph can be" - echo " easily" - echo " uploaded to the Genome Browser as a custom track," - echo " BUT CAN NOT be converted into a BigWig file (w/o removing the" - echo " first line)." - echo "" - echo " --trackopts" - echo " type: string, multiple values allowed" - echo " Writes additional track line definition parameters in the first line." - echo " - Example:" - echo " -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'" - echo " Note the use of single-quotes if you have spaces in your parameters." - echo " - (TEXT)" -} # initialise variables VIASH_MODE='run' @@ -591,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_genomecov" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -708,6 +573,167 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_genomecov main" + echo "" + echo "Compute the coverage of a feature file among a genome." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, file must exist" + echo " example: input.bed" + echo " The input file (BED/GFF/VCF) to be used." + echo "" + echo " -ibam, --input_bam" + echo " type: file, file must exist" + echo " The input file is in BAM format." + echo " Note: BAM _must_ be sorted by positions." + echo " '--genome' option is ignored if you use '--input_bam' option!" + echo "" + echo " -g, --genome" + echo " type: file, file must exist" + echo " example: genome.txt" + echo " The genome file to be used." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.bed" + echo " The output BED file." + echo "" + echo "Options:" + echo " -d, --depth" + echo " type: boolean_true" + echo " Report the depth at each genome position (with one-based coordinates)." + echo " Default behavior is to report a histogram." + echo "" + echo " -dz, --depth_zero" + echo " type: boolean_true" + echo " Report the depth at each genome position (with zero-based coordinates)." + echo " Reports only non-zero positions." + echo " Default behavior is to report a histogram." + echo "" + echo " -bg, --bed_graph" + echo " type: boolean_true" + echo " Report depth in BedGraph format. For details, see:" + echo " genome.ucsc.edu/goldenPath/help/bedgraph.html" + echo "" + echo " -bga, --bed_graph_zero_coverage" + echo " type: boolean_true" + echo " Report depth in BedGraph format, as above (-bg)." + echo " However with this option, regions with zero" + echo " coverage are also reported. This allows one to" + echo " quickly extract all regions of a genome with 0" + echo " coverage by applying: \"grep -w 0\$\" to the output." + echo "" + echo " --split" + echo " type: boolean_true" + echo " Treat \"split\" BAM or BED12 entries as distinct BED intervals." + echo " when computing coverage." + echo " For BAM files, this uses the CIGAR \"N\" and \"D\" operations" + echo " to infer the blocks for computing coverage." + echo " For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds" + echo " fields (i.e., columns 10,11,12)." + echo "" + echo " -ignoreD, --ignore_deletion" + echo " type: boolean_true" + echo " Ignore local deletions (CIGAR \"D\" operations) in BAM entries" + echo " when computing coverage." + echo "" + echo " --strand" + echo " type: string" + echo " choices: [ +, - ]" + echo " Calculate coverage of intervals from a specific strand." + echo " With BED files, requires at least 6 columns (strand is column 6)." + echo "" + echo " -pc, --pair_end_coverage" + echo " type: boolean_true" + echo " Calculate coverage of pair-end fragments." + echo " Works for BAM files only" + echo "" + echo " -fs, --fragment_size" + echo " type: boolean_true" + echo " Force to use provided fragment size instead of read length" + echo " Works for BAM files only" + echo "" + echo " --du" + echo " type: boolean_true" + echo " Change strand af the mate read (so both reads from the same strand)" + echo " useful for strand specific" + echo " Works for BAM files only" + echo "" + echo " -5, --five_prime" + echo " type: boolean_true" + echo " Calculate coverage of 5\" positions (instead of entire interval)." + echo "" + echo " -3, --three_prime" + echo " type: boolean_true" + echo " Calculate coverage of 3\" positions (instead of entire interval)." + echo "" + echo " --max" + echo " type: integer" + echo " min: 0" + echo " Combine all positions with a depth >= max into" + echo " a single bin in the histogram. Irrelevant" + echo " for -d and -bedGraph" + echo " - (INTEGER)" + echo "" + echo " --scale" + echo " type: double" + echo " min: 0.0" + echo " Scale the coverage by a constant factor." + echo " Each coverage value is multiplied by this factor before being reported." + echo " Useful for normalizing coverage by, e.g., reads per million (RPM)." + echo " - Default is 1.0; i.e., unscaled." + echo " - (FLOAT)" + echo "" + echo " --trackline" + echo " type: boolean_true" + echo " Adds a UCSC/Genome-Browser track line definition in the first line of" + echo " the output." + echo " - See here for more details about track line definition:" + echo " http://genome.ucsc.edu/goldenPath/help/bedgraph.html" + echo " - NOTE: When adding a trackline definition, the output BedGraph can be" + echo " easily" + echo " uploaded to the Genome Browser as a custom track," + echo " BUT CAN NOT be converted into a BigWig file (w/o removing the" + echo " first line)." + echo "" + echo " --trackopts" + echo " type: string, multiple values allowed" + echo " Writes additional track line definition parameters in the first line." + echo " - Example:" + echo " -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'" + echo " Note the use of single-quotes if you have spaces in your parameters." + echo " - (TEXT)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_getfasta/.config.vsh.yaml b/target/executable/bedtools/bedtools_getfasta/.config.vsh.yaml index 6fc44a04..9998ae82 100644 --- a/target/executable/bedtools/bedtools_getfasta/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_getfasta/.config.vsh.yaml @@ -123,6 +123,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -231,16 +234,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_getfasta" executable: "target/executable/bedtools/bedtools_getfasta/bedtools_getfasta" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_getfasta/bedtools_getfasta b/target/executable/bedtools/bedtools_getfasta/bedtools_getfasta index 8211df9a..c4677746 100755 --- a/target/executable/bedtools/bedtools_getfasta/bedtools_getfasta +++ b/target/executable/bedtools/bedtools_getfasta/bedtools_getfasta @@ -2,7 +2,7 @@ # bedtools_getfasta main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,76 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_getfasta main" - echo "" - echo "Extract sequences from a FASTA file for each of the intervals defined in a" - echo "BED/GFF/VCF file." - echo "" - echo "Input arguments:" - echo " --input_fasta" - echo " type: file, file must exist" - echo " FASTA file containing sequences for each interval specified in the input" - echo " BED file." - echo " The headers in the input FASTA file must exactly match the chromosome" - echo " column in the BED file." - echo "" - echo " --input_bed" - echo " type: file, file must exist" - echo " BED file containing intervals to extract from the FASTA file." - echo " BED files containing a single region require a newline character" - echo " at the end of the line, otherwise a blank output file is produced." - echo "" - echo " --rna" - echo " type: boolean_true" - echo " The FASTA is RNA not DNA. Reverse complementation handled accordingly." - echo "" - echo "Run arguments:" - echo " -s, --strandedness" - echo " type: boolean_true" - echo " Force strandedness. If the feature occupies the antisense strand, the" - echo " output sequence will" - echo " be reverse complemented. By default strandedness is not taken into" - echo " account." - echo "" - echo "Output arguments:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output file where the output from the 'bedtools getfasta' commend will" - echo " be written to." - echo "" - echo " --tab" - echo " type: boolean_true" - echo " Report extract sequences in a tab-delimited format instead of in FASTA" - echo " format." - echo "" - echo " --bed_out" - echo " type: boolean_true" - echo " Report extract sequences in a tab-delimited BED format instead of in" - echo " FASTA format." - echo "" - echo " --name" - echo " type: boolean_true" - echo " Set the FASTA header for each extracted sequence to be the \"name\" and" - echo " coordinate columns from the BED feature." - echo "" - echo " --name_only" - echo " type: boolean_true" - echo " Set the FASTA header for each extracted sequence to be the \"name\"" - echo " columns from the BED feature." - echo "" - echo " --split" - echo " type: boolean_true" - echo " When --input is in BED12 format, create a separate fasta entry for each" - echo " block in a BED12 record," - echo " blocks being described in the 11th and 12th column of the BED." - echo "" - echo " --full_header" - echo " type: boolean_true" - echo " Use full fasta header. By default, only the word before the first space" - echo " or tab is used." -} # initialise variables VIASH_MODE='run' @@ -526,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_getfasta" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -643,6 +573,102 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_getfasta main" + echo "" + echo "Extract sequences from a FASTA file for each of the intervals defined in a" + echo "BED/GFF/VCF file." + echo "" + echo "Input arguments:" + echo " --input_fasta" + echo " type: file, file must exist" + echo " FASTA file containing sequences for each interval specified in the input" + echo " BED file." + echo " The headers in the input FASTA file must exactly match the chromosome" + echo " column in the BED file." + echo "" + echo " --input_bed" + echo " type: file, file must exist" + echo " BED file containing intervals to extract from the FASTA file." + echo " BED files containing a single region require a newline character" + echo " at the end of the line, otherwise a blank output file is produced." + echo "" + echo " --rna" + echo " type: boolean_true" + echo " The FASTA is RNA not DNA. Reverse complementation handled accordingly." + echo "" + echo "Run arguments:" + echo " -s, --strandedness" + echo " type: boolean_true" + echo " Force strandedness. If the feature occupies the antisense strand, the" + echo " output sequence will" + echo " be reverse complemented. By default strandedness is not taken into" + echo " account." + echo "" + echo "Output arguments:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output file where the output from the 'bedtools getfasta' commend will" + echo " be written to." + echo "" + echo " --tab" + echo " type: boolean_true" + echo " Report extract sequences in a tab-delimited format instead of in FASTA" + echo " format." + echo "" + echo " --bed_out" + echo " type: boolean_true" + echo " Report extract sequences in a tab-delimited BED format instead of in" + echo " FASTA format." + echo "" + echo " --name" + echo " type: boolean_true" + echo " Set the FASTA header for each extracted sequence to be the \"name\" and" + echo " coordinate columns from the BED feature." + echo "" + echo " --name_only" + echo " type: boolean_true" + echo " Set the FASTA header for each extracted sequence to be the \"name\"" + echo " columns from the BED feature." + echo "" + echo " --split" + echo " type: boolean_true" + echo " When --input is in BED12 format, create a separate fasta entry for each" + echo " block in a BED12 record," + echo " blocks being described in the 11th and 12th column of the BED." + echo "" + echo " --full_header" + echo " type: boolean_true" + echo " Use full fasta header. By default, only the word before the first space" + echo " or tab is used." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_groupby/.config.vsh.yaml b/target/executable/bedtools/bedtools_groupby/.config.vsh.yaml index b1f46153..24183173 100644 --- a/target/executable/bedtools/bedtools_groupby/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_groupby/.config.vsh.yaml @@ -165,6 +165,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -272,16 +275,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_groupby" executable: "target/executable/bedtools/bedtools_groupby/bedtools_groupby" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_groupby/bedtools_groupby b/target/executable/bedtools/bedtools_groupby/bedtools_groupby index 4ce69b09..d367f977 100755 --- a/target/executable/bedtools/bedtools_groupby/bedtools_groupby +++ b/target/executable/bedtools/bedtools_groupby/bedtools_groupby @@ -2,7 +2,7 @@ # bedtools_groupby main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,102 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_groupby main" - echo "" - echo "Summarizes a dataset column based upon common column groupings." - echo "Akin to the SQL \"group by\" command." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input_a.bed" - echo " The input BED file to be used." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.bed" - echo " The output groupby BED file." - echo "" - echo "Options:" - echo " -g, -grp, --groupby" - echo " type: string, required parameter" - echo " Specify the columns (1-based) for the grouping." - echo " The columns must be comma separated." - echo " - Default: 1,2,3" - echo "" - echo " -c, -opCols, --column" - echo " type: integer, required parameter" - echo " Specify the column (1-based) that should be summarized." - echo "" - echo " -o, -ops, --operation" - echo " type: string" - echo " Specify the operation that should be applied to opCol." - echo " Valid operations:" - echo " sum, count, count_distinct, min, max," - echo " mean, median, mode, antimode," - echo " stdev, sstdev (sample standard dev.)," - echo " collapse (i.e., print a comma separated list (duplicates allowed))," - echo " distinct (i.e., print a comma separated list (NO duplicates" - echo " allowed))," - echo " distinct_sort_num (as distinct, but sorted numerically, ascending)," - echo " distinct_sort_num_desc (as distinct, but sorted numerically," - echo " descending)," - echo " concat (i.e., merge values into a single, non-delimited string)," - echo " freqdesc (i.e., print desc. list of values:freq)" - echo " freqasc (i.e., print asc. list of values:freq)" - echo " first (i.e., print first value)" - echo " last (i.e., print last value)" - echo " Default value: sum" - echo " If there is only column, but multiple operations, all operations will be" - echo " applied on that column. Likewise, if there is only one operation, but" - echo " multiple columns, that operation will be applied to all columns." - echo " Otherwise, the number of columns must match the the number of" - echo " operations," - echo " and will be applied in respective order." - echo " E.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," - echo " the mean of column 4, and the count of column 6." - echo " The order of output columns will match the ordering given in the" - echo " command." - echo "" - echo " --full" - echo " type: boolean_true" - echo " Print all columns from input file. The first line in the group is used." - echo " Default: print only grouped columns." - echo "" - echo " --inheader" - echo " type: boolean_true" - echo " Input file has a header line - the first line will be ignored." - echo "" - echo " --outheader" - echo " type: boolean_true" - echo " Print header line in the output, detailing the column names." - echo " If the input file has headers (-inheader), the output file" - echo " will use the input's column names." - echo " If the input file has no headers, the output file" - echo " will use \"col_1\", \"col_2\", etc. as the column names." - echo "" - echo " --header" - echo " type: boolean_true" - echo " same as '-inheader -outheader'." - echo "" - echo " --ignorecase" - echo " type: boolean_true" - echo " Group values regardless of upper/lower case." - echo "" - echo " -prec, --precision" - echo " type: integer" - echo " default: 5" - echo " Sets the decimal precision for output." - echo "" - echo " -delim, --delimiter" - echo " type: string" - echo " default: ," - echo " example: |" - echo " Specify a custom delimiter for the collapse operations." -} # initialise variables VIASH_MODE='run' @@ -552,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_groupby" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -669,6 +573,128 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_groupby main" + echo "" + echo "Summarizes a dataset column based upon common column groupings." + echo "Akin to the SQL \"group by\" command." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input_a.bed" + echo " The input BED file to be used." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.bed" + echo " The output groupby BED file." + echo "" + echo "Options:" + echo " -g, -grp, --groupby" + echo " type: string, required parameter" + echo " Specify the columns (1-based) for the grouping." + echo " The columns must be comma separated." + echo " - Default: 1,2,3" + echo "" + echo " -c, -opCols, --column" + echo " type: integer, required parameter" + echo " Specify the column (1-based) that should be summarized." + echo "" + echo " -o, -ops, --operation" + echo " type: string" + echo " Specify the operation that should be applied to opCol." + echo " Valid operations:" + echo " sum, count, count_distinct, min, max," + echo " mean, median, mode, antimode," + echo " stdev, sstdev (sample standard dev.)," + echo " collapse (i.e., print a comma separated list (duplicates allowed))," + echo " distinct (i.e., print a comma separated list (NO duplicates" + echo " allowed))," + echo " distinct_sort_num (as distinct, but sorted numerically, ascending)," + echo " distinct_sort_num_desc (as distinct, but sorted numerically," + echo " descending)," + echo " concat (i.e., merge values into a single, non-delimited string)," + echo " freqdesc (i.e., print desc. list of values:freq)" + echo " freqasc (i.e., print asc. list of values:freq)" + echo " first (i.e., print first value)" + echo " last (i.e., print last value)" + echo " Default value: sum" + echo " If there is only column, but multiple operations, all operations will be" + echo " applied on that column. Likewise, if there is only one operation, but" + echo " multiple columns, that operation will be applied to all columns." + echo " Otherwise, the number of columns must match the the number of" + echo " operations," + echo " and will be applied in respective order." + echo " E.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," + echo " the mean of column 4, and the count of column 6." + echo " The order of output columns will match the ordering given in the" + echo " command." + echo "" + echo " --full" + echo " type: boolean_true" + echo " Print all columns from input file. The first line in the group is used." + echo " Default: print only grouped columns." + echo "" + echo " --inheader" + echo " type: boolean_true" + echo " Input file has a header line - the first line will be ignored." + echo "" + echo " --outheader" + echo " type: boolean_true" + echo " Print header line in the output, detailing the column names." + echo " If the input file has headers (-inheader), the output file" + echo " will use the input's column names." + echo " If the input file has no headers, the output file" + echo " will use \"col_1\", \"col_2\", etc. as the column names." + echo "" + echo " --header" + echo " type: boolean_true" + echo " same as '-inheader -outheader'." + echo "" + echo " --ignorecase" + echo " type: boolean_true" + echo " Group values regardless of upper/lower case." + echo "" + echo " -prec, --precision" + echo " type: integer" + echo " default: 5" + echo " Sets the decimal precision for output." + echo "" + echo " -delim, --delimiter" + echo " type: string" + echo " default: ," + echo " example: |" + echo " Specify a custom delimiter for the collapse operations." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_intersect/.config.vsh.yaml b/target/executable/bedtools/bedtools_intersect/.config.vsh.yaml index f2a79977..132c6869 100644 --- a/target/executable/bedtools/bedtools_intersect/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_intersect/.config.vsh.yaml @@ -301,6 +301,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -409,16 +412,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_intersect" executable: "target/executable/bedtools/bedtools_intersect/bedtools_intersect" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_intersect/bedtools_intersect b/target/executable/bedtools/bedtools_intersect/bedtools_intersect index 8ae4cfc4..b116bd59 100755 --- a/target/executable/bedtools/bedtools_intersect/bedtools_intersect +++ b/target/executable/bedtools/bedtools_intersect/bedtools_intersect @@ -2,7 +2,7 @@ # bedtools_intersect main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,183 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_intersect main" - echo "" - echo "bedtools intersect allows one to screen for overlaps between two sets of genomic" - echo "features." - echo "Moreover, it allows one to have fine control as to how the intersections are" - echo "reported." - echo "bedtools intersect works with both BED/GFF/VCF and BAM files as input." - echo "" - echo "Inputs:" - echo " -a, --input_a" - echo " type: file, required parameter, file must exist" - echo " example: input_a.bed" - echo " The input file (BED/GFF/VCF/BAM) to be used as the -a file." - echo "" - echo " -b, --input_b" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input_b.bed" - echo " The input file(s) (BED/GFF/VCF/BAM) to be used as the -b file(s)." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.bed" - echo " The output BED file." - echo "" - echo "Options:" - echo " -wa, --write_a" - echo " type: boolean_true" - echo " Write the original A entry for each overlap." - echo "" - echo " -wb, --write_b" - echo " type: boolean_true" - echo " Write the original B entry for each overlap." - echo " Useful for knowing _what_ A overlaps. Restricted by -f and -r." - echo "" - echo " -loj, --left_outer_join" - echo " type: boolean_true" - echo " Perform a \"left outer join\". That is, for each feature in A report each" - echo " overlap with B." - echo " If no overlaps are found, report a NULL feature for B." - echo "" - echo " -wo, --write_overlap" - echo " type: boolean_true" - echo " Write the original A and B entries plus the number of base pairs of" - echo " overlap between the two features." - echo " - Overlaps restricted by -f and -r." - echo " Only A features with overlap are reported." - echo "" - echo " -wao, --write_overlap_plus" - echo " type: boolean_true" - echo " Write the original A and B entries plus the number of base pairs of" - echo " overlap between the two features." - echo " - Overlaps restricted by -f and -r." - echo " However, A features w/o overlap are also reported with a NULL B" - echo " feature and overlap = 0." - echo "" - echo " -u, --report_A_if_no_overlap" - echo " type: boolean_true" - echo " Write the original A entry _if_ no overlap is found." - echo " - In other words, just report the fact >=1 hit was found." - echo " - Overlaps restricted by -f and -r." - echo "" - echo " -c, --number_of_overlaps_A" - echo " type: boolean_true" - echo " For each entry in A, report the number of overlaps with B." - echo " - Reports 0 for A entries that have no overlap with B." - echo " - Overlaps restricted by -f and -r." - echo "" - echo " -v, --report_no_overlaps_A" - echo " type: boolean_true" - echo " Only report those entries in A that have _no overlaps_ with B." - echo " - Similar to \"grep -v\" (an homage)." - echo "" - echo " -ubam, --uncompressed_bam" - echo " type: boolean_true" - echo " Write uncompressed BAM output. Default writes compressed BAM." - echo "" - echo " -s, --same_strand" - echo " type: boolean_true" - echo " Require same strandedness. That is, only report hits in B." - echo " that overlap A on the _same_ strand." - echo " - By default, overlaps are reported without respect to strand." - echo "" - echo " -S, --opposite_strand" - echo " type: boolean_true" - echo " Require different strandedness. That is, only report hits in B" - echo " that overlap A on the _opposite_ strand." - echo " - By default, overlaps are reported without respect to strand." - echo "" - echo " -f, --min_overlap_A" - echo " type: double" - echo " example: 0.5" - echo " Minimum overlap required as a fraction of A." - echo " - Default is 1E-9 (i.e., 1bp)." - echo " - FLOAT (e.g. 0.50)" - echo "" - echo " -F, --min_overlap_B" - echo " type: double" - echo " example: 0.5" - echo " Minimum overlap required as a fraction of B." - echo " - Default is 1E-9 (i.e., 1bp)." - echo " - FLOAT (e.g. 0.50)" - echo "" - echo " -r, --reciprocal_overlap" - echo " type: boolean_true" - echo " Require that the fraction overlap be reciprocal for A AND B." - echo " - In other words, if -f is 0.90 and -r is used, this requires" - echo " that B overlap 90% of A and A _also_ overlaps 90% of B." - echo "" - echo " -e, --either_overlap" - echo " type: boolean_true" - echo " Require that the minimum fraction be satisfied for A OR B." - echo " - In other words, if -e is used with -f 0.90 and -F 0.10 this requires" - echo " that either 90% of A is covered OR 10% of B is covered." - echo " Without -e, both fractions would have to be satisfied." - echo "" - echo " --split" - echo " type: boolean_true" - echo " Treat \"split\" BAM or BED12 entries as distinct BED intervals." - echo "" - echo " -g, --genome" - echo " type: file, file must exist" - echo " example: genome.txt" - echo " Provide a genome file to enforce consistent chromosome" - echo " sort order across input files. Only applies when used" - echo " with -sorted option." - echo "" - echo " --nonamecheck" - echo " type: boolean_true" - echo " For sorted data, don't throw an error if the file" - echo " has different naming conventions for the same chromosome" - echo " (e.g., \"chr1\" vs \"chr01\")." - echo "" - echo " --sorted" - echo " type: boolean_true" - echo " Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input." - echo "" - echo " --names" - echo " type: string" - echo " When using multiple databases, provide an alias" - echo " for each that will appear instead of a fileId when" - echo " also printing the DB record." - echo "" - echo " --filenames" - echo " type: boolean_true" - echo " When using multiple databases, show each complete filename instead of a" - echo " fileId when also printing the DB record." - echo "" - echo " --sortout" - echo " type: boolean_true" - echo " When using multiple databases, sort the output DB hits for each record." - echo "" - echo " --bed" - echo " type: boolean_true" - echo " If using BAM input, write output as BED." - echo "" - echo " --header" - echo " type: boolean_true" - echo " Print the header from the A file prior to results." - echo "" - echo " --nobuf, --no_buffer_output" - echo " type: boolean_true" - echo " Disable buffered output. Using this option will cause each line" - echo " of output to be printed as it is generated, rather than saved" - echo " in a buffer. This will make printing large output files" - echo " noticeably slower, but can be useful in conjunction with" - echo " other software tools and scripts that need to process one" - echo " line of bedtools output at a time." - echo "" - echo " --iobuf, --io_buffer_size" - echo " type: integer" - echo " Specify amount of memory to use for input buffer." - echo " Takes an integer argument. Optional suffixes K/M/G supported." - echo " Note: currently has no effect with compressed files." -} # initialise variables VIASH_MODE='run' @@ -633,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_intersect" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -750,6 +573,209 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_intersect main" + echo "" + echo "bedtools intersect allows one to screen for overlaps between two sets of genomic" + echo "features." + echo "Moreover, it allows one to have fine control as to how the intersections are" + echo "reported." + echo "bedtools intersect works with both BED/GFF/VCF and BAM files as input." + echo "" + echo "Inputs:" + echo " -a, --input_a" + echo " type: file, required parameter, file must exist" + echo " example: input_a.bed" + echo " The input file (BED/GFF/VCF/BAM) to be used as the -a file." + echo "" + echo " -b, --input_b" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input_b.bed" + echo " The input file(s) (BED/GFF/VCF/BAM) to be used as the -b file(s)." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.bed" + echo " The output BED file." + echo "" + echo "Options:" + echo " -wa, --write_a" + echo " type: boolean_true" + echo " Write the original A entry for each overlap." + echo "" + echo " -wb, --write_b" + echo " type: boolean_true" + echo " Write the original B entry for each overlap." + echo " Useful for knowing _what_ A overlaps. Restricted by -f and -r." + echo "" + echo " -loj, --left_outer_join" + echo " type: boolean_true" + echo " Perform a \"left outer join\". That is, for each feature in A report each" + echo " overlap with B." + echo " If no overlaps are found, report a NULL feature for B." + echo "" + echo " -wo, --write_overlap" + echo " type: boolean_true" + echo " Write the original A and B entries plus the number of base pairs of" + echo " overlap between the two features." + echo " - Overlaps restricted by -f and -r." + echo " Only A features with overlap are reported." + echo "" + echo " -wao, --write_overlap_plus" + echo " type: boolean_true" + echo " Write the original A and B entries plus the number of base pairs of" + echo " overlap between the two features." + echo " - Overlaps restricted by -f and -r." + echo " However, A features w/o overlap are also reported with a NULL B" + echo " feature and overlap = 0." + echo "" + echo " -u, --report_A_if_no_overlap" + echo " type: boolean_true" + echo " Write the original A entry _if_ no overlap is found." + echo " - In other words, just report the fact >=1 hit was found." + echo " - Overlaps restricted by -f and -r." + echo "" + echo " -c, --number_of_overlaps_A" + echo " type: boolean_true" + echo " For each entry in A, report the number of overlaps with B." + echo " - Reports 0 for A entries that have no overlap with B." + echo " - Overlaps restricted by -f and -r." + echo "" + echo " -v, --report_no_overlaps_A" + echo " type: boolean_true" + echo " Only report those entries in A that have _no overlaps_ with B." + echo " - Similar to \"grep -v\" (an homage)." + echo "" + echo " -ubam, --uncompressed_bam" + echo " type: boolean_true" + echo " Write uncompressed BAM output. Default writes compressed BAM." + echo "" + echo " -s, --same_strand" + echo " type: boolean_true" + echo " Require same strandedness. That is, only report hits in B." + echo " that overlap A on the _same_ strand." + echo " - By default, overlaps are reported without respect to strand." + echo "" + echo " -S, --opposite_strand" + echo " type: boolean_true" + echo " Require different strandedness. That is, only report hits in B" + echo " that overlap A on the _opposite_ strand." + echo " - By default, overlaps are reported without respect to strand." + echo "" + echo " -f, --min_overlap_A" + echo " type: double" + echo " example: 0.5" + echo " Minimum overlap required as a fraction of A." + echo " - Default is 1E-9 (i.e., 1bp)." + echo " - FLOAT (e.g. 0.50)" + echo "" + echo " -F, --min_overlap_B" + echo " type: double" + echo " example: 0.5" + echo " Minimum overlap required as a fraction of B." + echo " - Default is 1E-9 (i.e., 1bp)." + echo " - FLOAT (e.g. 0.50)" + echo "" + echo " -r, --reciprocal_overlap" + echo " type: boolean_true" + echo " Require that the fraction overlap be reciprocal for A AND B." + echo " - In other words, if -f is 0.90 and -r is used, this requires" + echo " that B overlap 90% of A and A _also_ overlaps 90% of B." + echo "" + echo " -e, --either_overlap" + echo " type: boolean_true" + echo " Require that the minimum fraction be satisfied for A OR B." + echo " - In other words, if -e is used with -f 0.90 and -F 0.10 this requires" + echo " that either 90% of A is covered OR 10% of B is covered." + echo " Without -e, both fractions would have to be satisfied." + echo "" + echo " --split" + echo " type: boolean_true" + echo " Treat \"split\" BAM or BED12 entries as distinct BED intervals." + echo "" + echo " -g, --genome" + echo " type: file, file must exist" + echo " example: genome.txt" + echo " Provide a genome file to enforce consistent chromosome" + echo " sort order across input files. Only applies when used" + echo " with -sorted option." + echo "" + echo " --nonamecheck" + echo " type: boolean_true" + echo " For sorted data, don't throw an error if the file" + echo " has different naming conventions for the same chromosome" + echo " (e.g., \"chr1\" vs \"chr01\")." + echo "" + echo " --sorted" + echo " type: boolean_true" + echo " Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input." + echo "" + echo " --names" + echo " type: string" + echo " When using multiple databases, provide an alias" + echo " for each that will appear instead of a fileId when" + echo " also printing the DB record." + echo "" + echo " --filenames" + echo " type: boolean_true" + echo " When using multiple databases, show each complete filename instead of a" + echo " fileId when also printing the DB record." + echo "" + echo " --sortout" + echo " type: boolean_true" + echo " When using multiple databases, sort the output DB hits for each record." + echo "" + echo " --bed" + echo " type: boolean_true" + echo " If using BAM input, write output as BED." + echo "" + echo " --header" + echo " type: boolean_true" + echo " Print the header from the A file prior to results." + echo "" + echo " --nobuf, --no_buffer_output" + echo " type: boolean_true" + echo " Disable buffered output. Using this option will cause each line" + echo " of output to be printed as it is generated, rather than saved" + echo " in a buffer. This will make printing large output files" + echo " noticeably slower, but can be useful in conjunction with" + echo " other software tools and scripts that need to process one" + echo " line of bedtools output at a time." + echo "" + echo " --iobuf, --io_buffer_size" + echo " type: integer" + echo " Specify amount of memory to use for input buffer." + echo " Takes an integer argument. Optional suffixes K/M/G supported." + echo " Note: currently has no effect with compressed files." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_links/.config.vsh.yaml b/target/executable/bedtools/bedtools_links/.config.vsh.yaml index bc6afa65..7037157a 100644 --- a/target/executable/bedtools/bedtools_links/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_links/.config.vsh.yaml @@ -100,6 +100,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -209,16 +212,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_links" executable: "target/executable/bedtools/bedtools_links/bedtools_links" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_links/bedtools_links b/target/executable/bedtools/bedtools_links/bedtools_links index 22e16dda..613d51be 100755 --- a/target/executable/bedtools/bedtools_links/bedtools_links +++ b/target/executable/bedtools/bedtools_links/bedtools_links @@ -2,7 +2,7 @@ # bedtools_links main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,50 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_links main" - echo "" - echo "Creates an HTML file with links to an instance of the UCSC Genome Browser for" - echo "all features / intervals in a file." - echo "This is useful for cases when one wants to manually inspect through a large set" - echo "of annotations or features." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input file (bed/gff/vcf)." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " Output HTML file to be written." - echo "" - echo "Options:" - echo " By default, the links created will point to human (hg18) UCSC browser." - echo " If you have a local mirror, you can override this behavior by supplying" - echo " the -base, -org, and -db options." - echo " For example, if the URL of your local mirror for mouse MM9 is called:" - echo " http://mymirror.myuniversity.edu, then you would use the following:" - echo " --base_url http://mymirror.myuniversity.edu" - echo " --organism mouse" - echo " --database mm9" - echo "" - echo " -base, --base_url" - echo " type: string" - echo " default: http://genome.ucsc.edu" - echo " The “basename” for the UCSC browser." - echo "" - echo " -org, --organism" - echo " type: string" - echo " default: human" - echo " The organism (e.g. mouse, human)." - echo "" - echo " -db, --database" - echo " type: string" - echo " default: hg18" - echo " The genome build." -} # initialise variables VIASH_MODE='run' @@ -500,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_links" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -617,6 +573,76 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_links main" + echo "" + echo "Creates an HTML file with links to an instance of the UCSC Genome Browser for" + echo "all features / intervals in a file." + echo "This is useful for cases when one wants to manually inspect through a large set" + echo "of annotations or features." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input file (bed/gff/vcf)." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " Output HTML file to be written." + echo "" + echo "Options:" + echo " By default, the links created will point to human (hg18) UCSC browser." + echo " If you have a local mirror, you can override this behavior by supplying" + echo " the -base, -org, and -db options." + echo " For example, if the URL of your local mirror for mouse MM9 is called:" + echo " http://mymirror.myuniversity.edu, then you would use the following:" + echo " --base_url http://mymirror.myuniversity.edu" + echo " --organism mouse" + echo " --database mm9" + echo "" + echo " -base, --base_url" + echo " type: string" + echo " default: http://genome.ucsc.edu" + echo " The “basename” for the UCSC browser." + echo "" + echo " -org, --organism" + echo " type: string" + echo " default: human" + echo " The organism (e.g. mouse, human)." + echo "" + echo " -db, --database" + echo " type: string" + echo " default: hg18" + echo " The genome build." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_merge/.config.vsh.yaml b/target/executable/bedtools/bedtools_merge/.config.vsh.yaml index 5a26354b..d574fd8a 100644 --- a/target/executable/bedtools/bedtools_merge/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_merge/.config.vsh.yaml @@ -174,6 +174,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -278,16 +281,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_merge" executable: "target/executable/bedtools/bedtools_merge/bedtools_merge" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_merge/bedtools_merge b/target/executable/bedtools/bedtools_merge/bedtools_merge index f54e4ece..1aafa88a 100755 --- a/target/executable/bedtools/bedtools_merge/bedtools_merge +++ b/target/executable/bedtools/bedtools_merge/bedtools_merge @@ -2,7 +2,7 @@ # bedtools_merge main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,108 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_merge main" - echo "" - echo "Merges overlapping BED/GFF/VCF entries into a single interval." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input file (BED/GFF/VCF) to be merged." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Output merged file BED to be written." - echo "" - echo "Options:" - echo " -s, --strand" - echo " type: boolean_true" - echo " Force strandedness. That is, only merge features" - echo " that are on the same strand." - echo " - By default, merging is done without respect to strand." - echo "" - echo " -S, --specific_strand" - echo " type: string" - echo " choices: [ +, - ]" - echo " Force merge for one specific strand only." - echo " Follow with + or - to force merge from only" - echo " the forward or reverse strand, respectively." - echo " - By default, merging is done without respect to strand." - echo "" - echo " -d, --distance" - echo " type: integer" - echo " Maximum distance between features allowed for features" - echo " to be merged." - echo " - Def. 0. That is, overlapping & book-ended features are merged." - echo " - (INTEGER)" - echo " - Note: negative values enforce the number of b.p. required for overlap." - echo "" - echo " -c, --columns" - echo " type: integer" - echo " Specify columns from the B file to map onto intervals in A." - echo " Default: 5." - echo " Multiple columns can be specified in a comma-delimited list." - echo "" - echo " -o, --operation" - echo " type: string" - echo " Specify the operation that should be applied to -c." - echo " Valid operations:" - echo " sum, min, max, absmin, absmax," - echo " mean, median, mode, antimode" - echo " stdev, sstdev" - echo " collapse (i.e., print a delimited list (duplicates allowed))," - echo " distinct (i.e., print a delimited list (NO duplicates allowed))," - echo " distinct_sort_num (as distinct, sorted numerically, ascending)," - echo " distinct_sort_num_desc (as distinct, sorted numerically," - echo " desscending)," - echo " distinct_only (delimited list of only unique values)," - echo " count" - echo " count_distinct (i.e., a count of the unique values in the column)," - echo " first (i.e., just the first value in the column)," - echo " last (i.e., just the last value in the column)," - echo " Default: sum" - echo " Multiple operations can be specified in a comma-delimited list." - echo " If there is only column, but multiple operations, all operations will be" - echo " applied on that column. Likewise, if there is only one operation, but" - echo " multiple columns, that operation will be applied to all columns." - echo " Otherwise, the number of columns must match the the number of" - echo " operations," - echo " and will be applied in respective order." - echo " E.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," - echo " the mean of column 4, and the count of column 6." - echo " The order of output columns will match the ordering given in the" - echo " command." - echo "" - echo " -delim, --delimiter" - echo " type: string" - echo " default: ," - echo " example: |" - echo " Specify a custom delimiter for the collapse operations." - echo "" - echo " -prec, --precision" - echo " type: integer" - echo " Sets the decimal precision for output (Default: 5)." - echo "" - echo " --bed" - echo " type: boolean_true" - echo " If using BAM input, write output as BED." - echo "" - echo " --header" - echo " type: boolean_true" - echo " Print the header from the A file prior to results." - echo "" - echo " -nobuf, --no_buffer" - echo " type: boolean_true" - echo " Disable buffered output. Using this option will cause each line" - echo " of output to be printed as it is generated, rather than saved" - echo " in a buffer. This will make printing large output files" - echo " noticeably slower, but can be useful in conjunction with" - echo " other software tools and scripts that need to process one" - echo " line of bedtools output at a time." -} # initialise variables VIASH_MODE='run' @@ -558,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_merge" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -675,6 +573,134 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_merge main" + echo "" + echo "Merges overlapping BED/GFF/VCF entries into a single interval." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input file (BED/GFF/VCF) to be merged." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Output merged file BED to be written." + echo "" + echo "Options:" + echo " -s, --strand" + echo " type: boolean_true" + echo " Force strandedness. That is, only merge features" + echo " that are on the same strand." + echo " - By default, merging is done without respect to strand." + echo "" + echo " -S, --specific_strand" + echo " type: string" + echo " choices: [ +, - ]" + echo " Force merge for one specific strand only." + echo " Follow with + or - to force merge from only" + echo " the forward or reverse strand, respectively." + echo " - By default, merging is done without respect to strand." + echo "" + echo " -d, --distance" + echo " type: integer" + echo " Maximum distance between features allowed for features" + echo " to be merged." + echo " - Def. 0. That is, overlapping & book-ended features are merged." + echo " - (INTEGER)" + echo " - Note: negative values enforce the number of b.p. required for overlap." + echo "" + echo " -c, --columns" + echo " type: integer" + echo " Specify columns from the B file to map onto intervals in A." + echo " Default: 5." + echo " Multiple columns can be specified in a comma-delimited list." + echo "" + echo " -o, --operation" + echo " type: string" + echo " Specify the operation that should be applied to -c." + echo " Valid operations:" + echo " sum, min, max, absmin, absmax," + echo " mean, median, mode, antimode" + echo " stdev, sstdev" + echo " collapse (i.e., print a delimited list (duplicates allowed))," + echo " distinct (i.e., print a delimited list (NO duplicates allowed))," + echo " distinct_sort_num (as distinct, sorted numerically, ascending)," + echo " distinct_sort_num_desc (as distinct, sorted numerically," + echo " desscending)," + echo " distinct_only (delimited list of only unique values)," + echo " count" + echo " count_distinct (i.e., a count of the unique values in the column)," + echo " first (i.e., just the first value in the column)," + echo " last (i.e., just the last value in the column)," + echo " Default: sum" + echo " Multiple operations can be specified in a comma-delimited list." + echo " If there is only column, but multiple operations, all operations will be" + echo " applied on that column. Likewise, if there is only one operation, but" + echo " multiple columns, that operation will be applied to all columns." + echo " Otherwise, the number of columns must match the the number of" + echo " operations," + echo " and will be applied in respective order." + echo " E.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," + echo " the mean of column 4, and the count of column 6." + echo " The order of output columns will match the ordering given in the" + echo " command." + echo "" + echo " -delim, --delimiter" + echo " type: string" + echo " default: ," + echo " example: |" + echo " Specify a custom delimiter for the collapse operations." + echo "" + echo " -prec, --precision" + echo " type: integer" + echo " Sets the decimal precision for output (Default: 5)." + echo "" + echo " --bed" + echo " type: boolean_true" + echo " If using BAM input, write output as BED." + echo "" + echo " --header" + echo " type: boolean_true" + echo " Print the header from the A file prior to results." + echo "" + echo " -nobuf, --no_buffer" + echo " type: boolean_true" + echo " Disable buffered output. Using this option will cause each line" + echo " of output to be printed as it is generated, rather than saved" + echo " in a buffer. This will make printing large output files" + echo " noticeably slower, but can be useful in conjunction with" + echo " other software tools and scripts that need to process one" + echo " line of bedtools output at a time." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/bedtools/bedtools_sort/.config.vsh.yaml b/target/executable/bedtools/bedtools_sort/.config.vsh.yaml index 6554e8fe..a341b1dc 100644 --- a/target/executable/bedtools/bedtools_sort/.config.vsh.yaml +++ b/target/executable/bedtools/bedtools_sort/.config.vsh.yaml @@ -114,6 +114,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -221,16 +224,16 @@ build_info: engine: "docker|native" output: "target/executable/bedtools/bedtools_sort" executable: "target/executable/bedtools/bedtools_sort/bedtools_sort" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools/bedtools_sort/bedtools_sort b/target/executable/bedtools/bedtools_sort/bedtools_sort index 941312b3..0d67a470 100755 --- a/target/executable/bedtools/bedtools_sort/bedtools_sort +++ b/target/executable/bedtools/bedtools_sort/bedtools_sort @@ -2,7 +2,7 @@ # bedtools_sort main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,59 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_sort main" - echo "" - echo "Sorts a feature file (bed/gff/vcf) by chromosome and other criteria." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input file (bed/gff/vcf) to be sorted." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " Output sorted file (bed/gff/vcf) to be written." - echo "" - echo "Options:" - echo " --sizeA" - echo " type: boolean_true" - echo " Sort by feature size in ascending order." - echo "" - echo " --sizeD" - echo " type: boolean_true" - echo " Sort by feature size in descending order." - echo "" - echo " --chrThenSizeA" - echo " type: boolean_true" - echo " Sort by chrom (asc), then feature size (asc)." - echo "" - echo " --chrThenSizeD" - echo " type: boolean_true" - echo " Sort by chrom (asc), then feature size (desc)." - echo "" - echo " --chrThenScoreA" - echo " type: boolean_true" - echo " Sort by chrom (asc), then score (asc)." - echo "" - echo " --chrThenScoreD" - echo " type: boolean_true" - echo " Sort by chrom (asc), then score (desc)." - echo "" - echo " -g, --genome" - echo " type: file, file must exist" - echo " Sort according to the chromosomes declared in \"genome.txt\"" - echo "" - echo " --faidx" - echo " type: file, file must exist" - echo " Sort according to the chromosomes declared in \"names.txt\"" - echo "" - echo " --header" - echo " type: boolean_true" - echo " Print the header from the A file prior to results." -} # initialise variables VIASH_MODE='run' @@ -509,9 +456,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_sort" -LABEL org.opencontainers.image.created="2024-12-03T10:34:03Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -626,6 +573,85 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_sort main" + echo "" + echo "Sorts a feature file (bed/gff/vcf) by chromosome and other criteria." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input file (bed/gff/vcf) to be sorted." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " Output sorted file (bed/gff/vcf) to be written." + echo "" + echo "Options:" + echo " --sizeA" + echo " type: boolean_true" + echo " Sort by feature size in ascending order." + echo "" + echo " --sizeD" + echo " type: boolean_true" + echo " Sort by feature size in descending order." + echo "" + echo " --chrThenSizeA" + echo " type: boolean_true" + echo " Sort by chrom (asc), then feature size (asc)." + echo "" + echo " --chrThenSizeD" + echo " type: boolean_true" + echo " Sort by chrom (asc), then feature size (desc)." + echo "" + echo " --chrThenScoreA" + echo " type: boolean_true" + echo " Sort by chrom (asc), then score (asc)." + echo "" + echo " --chrThenScoreD" + echo " type: boolean_true" + echo " Sort by chrom (asc), then score (desc)." + echo "" + echo " -g, --genome" + echo " type: file, file must exist" + echo " Sort according to the chromosomes declared in \"genome.txt\"" + echo "" + echo " --faidx" + echo " type: file, file must exist" + echo " Sort according to the chromosomes declared in \"names.txt\"" + echo "" + echo " --header" + echo " type: boolean_true" + echo " Print the header from the A file prior to results." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/busco/busco_download_datasets/.config.vsh.yaml b/target/executable/busco/busco_download_datasets/.config.vsh.yaml index b4a40135..12b8f47c 100644 --- a/target/executable/busco/busco_download_datasets/.config.vsh.yaml +++ b/target/executable/busco/busco_download_datasets/.config.vsh.yaml @@ -58,6 +58,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -157,16 +160,16 @@ build_info: engine: "docker|native" output: "target/executable/busco/busco_download_datasets" executable: "target/executable/busco/busco_download_datasets/busco_download_datasets" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/busco/busco_download_datasets/busco_download_datasets b/target/executable/busco/busco_download_datasets/busco_download_datasets index 939f9f43..d8dc2fb7 100755 --- a/target/executable/busco/busco_download_datasets/busco_download_datasets +++ b/target/executable/busco/busco_download_datasets/busco_download_datasets @@ -2,7 +2,7 @@ # busco_download_datasets main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,29 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "busco_download_datasets main" - echo "" - echo "Downloads available busco datasets" - echo "" - echo "Inputs:" - echo " --download" - echo " type: string, required parameter" - echo " example: stramenopiles_odb10" - echo " Download dataset. Possible values are a specific dataset name, \"all\"," - echo " \"prokaryota\", \"eukaryota\", or \"virus\"." - echo " The full list of available datasets can be viewed" - echo " [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the" - echo " busco/busco_list_datasets component." - echo "" - echo "Outputs:" - echo " --download_path" - echo " type: file, output, file must exist" - echo " default: busco_downloads" - echo " example: busco_downloads" - echo " Local filepath for storing BUSCO dataset downloads" -} # initialise variables VIASH_MODE='run' @@ -475,9 +452,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component busco busco_download_datasets" -LABEL org.opencontainers.image.created="2024-12-03T10:34:01Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -592,6 +569,55 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "busco_download_datasets main" + echo "" + echo "Downloads available busco datasets" + echo "" + echo "Inputs:" + echo " --download" + echo " type: string, required parameter" + echo " example: stramenopiles_odb10" + echo " Download dataset. Possible values are a specific dataset name, \"all\"," + echo " \"prokaryota\", \"eukaryota\", or \"virus\"." + echo " The full list of available datasets can be viewed" + echo " [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the" + echo " busco/busco_list_datasets component." + echo "" + echo "Outputs:" + echo " --download_path" + echo " type: file, output, file must exist" + echo " default: busco_downloads" + echo " example: busco_downloads" + echo " Local filepath for storing BUSCO dataset downloads" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/busco/busco_list_datasets/.config.vsh.yaml b/target/executable/busco/busco_list_datasets/.config.vsh.yaml index 49b5b868..41b1f542 100644 --- a/target/executable/busco/busco_list_datasets/.config.vsh.yaml +++ b/target/executable/busco/busco_list_datasets/.config.vsh.yaml @@ -45,6 +45,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -144,16 +147,16 @@ build_info: engine: "docker|native" output: "target/executable/busco/busco_list_datasets" executable: "target/executable/busco/busco_list_datasets/busco_list_datasets" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/busco/busco_list_datasets/busco_list_datasets b/target/executable/busco/busco_list_datasets/busco_list_datasets index 87b4e113..dd7f56de 100755 --- a/target/executable/busco/busco_list_datasets/busco_list_datasets +++ b/target/executable/busco/busco_list_datasets/busco_list_datasets @@ -2,7 +2,7 @@ # busco_list_datasets main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,19 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "busco_list_datasets main" - echo "" - echo "Lists the available busco datasets" - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: busco_dataset_list.txt" - echo " example: file.txt" - echo " Output file of the available busco datasets" -} # initialise variables VIASH_MODE='run' @@ -465,9 +452,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component busco busco_list_datasets" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -582,6 +569,45 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "busco_list_datasets main" + echo "" + echo "Lists the available busco datasets" + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: busco_dataset_list.txt" + echo " example: file.txt" + echo " Output file of the available busco datasets" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/busco/busco_run/.config.vsh.yaml b/target/executable/busco/busco_run/.config.vsh.yaml index 26ea3c53..facb6b7c 100644 --- a/target/executable/busco/busco_run/.config.vsh.yaml +++ b/target/executable/busco/busco_run/.config.vsh.yaml @@ -322,6 +322,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -422,16 +425,16 @@ build_info: engine: "docker|native" output: "target/executable/busco/busco_run" executable: "target/executable/busco/busco_run/busco_run" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/busco/busco_run/busco_run b/target/executable/busco/busco_run/busco_run index e1f473e9..b0ce61c5 100755 --- a/target/executable/busco/busco_run/busco_run +++ b/target/executable/busco/busco_run/busco_run @@ -2,7 +2,7 @@ # busco_run main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,186 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "busco_run main" - echo "" - echo "Assessment of genome assembly and annotation completeness with single copy" - echo "orthologs" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: file.fasta" - echo " Input sequence file in FASTA format. Can be an assembled genome or" - echo " transcriptome (DNA), or protein sequences from an annotated gene set." - echo " Also possible to use a path to a directory containing multiple input" - echo " files." - echo "" - echo " -m, --mode" - echo " type: string, required parameter" - echo " example: proteins" - echo " choices: [ genome, geno, transcriptome, tran, proteins, prot ]" - echo " Specify which BUSCO analysis mode to run. There are three valid modes:" - echo " - geno or genome, for genome assemblies (DNA)" - echo " - tran or transcriptome, for transcriptome assemblies (DNA)" - echo " - prot or proteins, for annotated gene sets (protein)" - echo "" - echo " -l, --lineage_dataset" - echo " type: string" - echo " example: stramenopiles_odb10" - echo " Specify a BUSCO lineage dataset that is most closely related to the" - echo " assembly or gene set being assessed." - echo " The full list of available datasets can be viewed" - echo " [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the" - echo " busco/busco_list_datasets component." - echo " When unsure, the \"--auto_lineage\" flag can be set to automatically find" - echo " the optimal lineage path." - echo " BUSCO will automatically download the requested dataset if it is not" - echo " already present in the download folder." - echo " You can optionally provide a path to a local dataset instead of a name," - echo " e.g. path/to/dataset." - echo " Datasets can be downloaded using the busco/busco_download_dataset" - echo " component." - echo "" - echo "Outputs:" - echo " --short_summary_json" - echo " type: file, output, file must exist" - echo " example: short_summary.json" - echo " Output file for short summary in JSON format." - echo "" - echo " --short_summary_txt" - echo " type: file, output, file must exist" - echo " example: short_summary.txt" - echo " Output file for short summary in TXT format." - echo "" - echo " --full_table" - echo " type: file, output, file must exist" - echo " example: full_table.tsv" - echo " Full table output in TSV format." - echo "" - echo " --missing_busco_list" - echo " type: file, output, file must exist" - echo " example: missing_busco_list.tsv" - echo " Missing list output in TSV format." - echo "" - echo " --output_dir" - echo " type: file, output, file must exist" - echo " example: output_dir" - echo " The full output directory, if so desired." - echo "" - echo "Resource and Run Settings:" - echo " --force" - echo " type: boolean_true" - echo " Force rewriting of existing files. Must be used when output files with" - echo " the provided name already exist." - echo "" - echo " -q, --quiet" - echo " type: boolean_true" - echo " Disable the info logs, displays only errors." - echo "" - echo " -r, --restart" - echo " type: boolean_true" - echo " Continue a run that had already partially completed. Restarting skips" - echo " calls to tools that have completed but performs all pre- and" - echo " post-processing steps." - echo "" - echo " --tar" - echo " type: boolean_true" - echo " Compress some subdirectories with many files to save space." - echo "" - echo "Lineage Dataset Settings:" - echo " --auto_lineage" - echo " type: boolean_true" - echo " Run auto-lineage pipelilne to automatically determine BUSCO lineage" - echo " dataset that is most closely related to the assembly or gene set being" - echo " assessed." - echo "" - echo " --auto_lineage_euk" - echo " type: boolean_true" - echo " Run auto-placement just on eukaryota tree to find optimal lineage path." - echo "" - echo " --auto_lineage_prok" - echo " type: boolean_true" - echo " Run auto_lineage just on prokaryota trees to find optimum lineage path." - echo "" - echo " --datasets_version" - echo " type: string" - echo " example: odb10" - echo " Specify the version of BUSCO datasets" - echo "" - echo "Augustus Settings:" - echo " --augustus" - echo " type: boolean_true" - echo " Use augustus gene predictor for eukaryote runs." - echo "" - echo " --augustus_parameters" - echo " type: string" - echo " example: --PARAM1=VALUE1,--PARAM2=VALUE2" - echo " Additional parameters to be passed to Augustus (see Augustus" - echo " documentation:" - echo " " - echo "https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md)." - echo " Parameters should be contained within a single string, without" - echo " whitespace and seperated by commas." - echo "" - echo " --augustus_species" - echo " type: string" - echo " Specify the augustus species" - echo "" - echo " --long" - echo " type: boolean_true" - echo " Optimize Augustus self-training mode. This adds considerably to the run" - echo " time, but can improve results for some non-model organisms." - echo "" - echo "BBTools Settings:" - echo " --contig_break" - echo " type: integer" - echo " Number of contiguous Ns to signify a break between contigs in BBTools" - echo " analysis." - echo "" - echo " --limit" - echo " type: integer" - echo " Number of candidate regions (contig or transcript) from the BLAST output" - echo " to consider per BUSCO." - echo " This option is only effective in pipelines using BLAST, i.e. the genome" - echo " pipeline (see --augustus) or the prokaryota transcriptome pipeline." - echo "" - echo " --scaffold_composition" - echo " type: boolean_true" - echo " Writes ACGTN content per scaffold to a file scaffold_composition.txt." - echo "" - echo "BLAST Settings:" - echo " --e_value" - echo " type: double" - echo " E-value cutoff for BLAST searches." - echo "" - echo "Protein Gene Prediction settings:" - echo " --miniprot" - echo " type: boolean_true" - echo " Use Miniprot gene predictor." - echo "" - echo "MetaEuk Settings:" - echo " --metaeuk" - echo " type: boolean_true" - echo " Use Metaeuk gene predictor." - echo "" - echo " --metaeuk_parameters" - echo " type: string" - echo " example: --max-overlap=15,--min-exon-aa=15" - echo " Pass additional arguments to Metaeuk for the first run (see Metaeuk" - echo " documentation https://github.com/soedinglab/metaeuk)." - echo " All parameters should be contained within a single string with no white" - echo " space, with each parameter separated by a comma." - echo "" - echo " --metaeuk_rerun_parameters" - echo " type: string" - echo " example: --max-overlap=15,--min-exon-aa=15" - echo " Pass additional arguments to Metaeuk for the second run (see Metaeuk" - echo " documentation https://github.com/soedinglab/metaeuk)." - echo " All parameters should be contained within a single string with no white" - echo " space, with each parameter separated by a comma." -} # initialise variables VIASH_MODE='run' @@ -632,9 +452,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component busco busco_run" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -749,6 +569,212 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "busco_run main" + echo "" + echo "Assessment of genome assembly and annotation completeness with single copy" + echo "orthologs" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: file.fasta" + echo " Input sequence file in FASTA format. Can be an assembled genome or" + echo " transcriptome (DNA), or protein sequences from an annotated gene set." + echo " Also possible to use a path to a directory containing multiple input" + echo " files." + echo "" + echo " -m, --mode" + echo " type: string, required parameter" + echo " example: proteins" + echo " choices: [ genome, geno, transcriptome, tran, proteins, prot ]" + echo " Specify which BUSCO analysis mode to run. There are three valid modes:" + echo " - geno or genome, for genome assemblies (DNA)" + echo " - tran or transcriptome, for transcriptome assemblies (DNA)" + echo " - prot or proteins, for annotated gene sets (protein)" + echo "" + echo " -l, --lineage_dataset" + echo " type: string" + echo " example: stramenopiles_odb10" + echo " Specify a BUSCO lineage dataset that is most closely related to the" + echo " assembly or gene set being assessed." + echo " The full list of available datasets can be viewed" + echo " [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the" + echo " busco/busco_list_datasets component." + echo " When unsure, the \"--auto_lineage\" flag can be set to automatically find" + echo " the optimal lineage path." + echo " BUSCO will automatically download the requested dataset if it is not" + echo " already present in the download folder." + echo " You can optionally provide a path to a local dataset instead of a name," + echo " e.g. path/to/dataset." + echo " Datasets can be downloaded using the busco/busco_download_dataset" + echo " component." + echo "" + echo "Outputs:" + echo " --short_summary_json" + echo " type: file, output, file must exist" + echo " example: short_summary.json" + echo " Output file for short summary in JSON format." + echo "" + echo " --short_summary_txt" + echo " type: file, output, file must exist" + echo " example: short_summary.txt" + echo " Output file for short summary in TXT format." + echo "" + echo " --full_table" + echo " type: file, output, file must exist" + echo " example: full_table.tsv" + echo " Full table output in TSV format." + echo "" + echo " --missing_busco_list" + echo " type: file, output, file must exist" + echo " example: missing_busco_list.tsv" + echo " Missing list output in TSV format." + echo "" + echo " --output_dir" + echo " type: file, output, file must exist" + echo " example: output_dir" + echo " The full output directory, if so desired." + echo "" + echo "Resource and Run Settings:" + echo " --force" + echo " type: boolean_true" + echo " Force rewriting of existing files. Must be used when output files with" + echo " the provided name already exist." + echo "" + echo " -q, --quiet" + echo " type: boolean_true" + echo " Disable the info logs, displays only errors." + echo "" + echo " -r, --restart" + echo " type: boolean_true" + echo " Continue a run that had already partially completed. Restarting skips" + echo " calls to tools that have completed but performs all pre- and" + echo " post-processing steps." + echo "" + echo " --tar" + echo " type: boolean_true" + echo " Compress some subdirectories with many files to save space." + echo "" + echo "Lineage Dataset Settings:" + echo " --auto_lineage" + echo " type: boolean_true" + echo " Run auto-lineage pipelilne to automatically determine BUSCO lineage" + echo " dataset that is most closely related to the assembly or gene set being" + echo " assessed." + echo "" + echo " --auto_lineage_euk" + echo " type: boolean_true" + echo " Run auto-placement just on eukaryota tree to find optimal lineage path." + echo "" + echo " --auto_lineage_prok" + echo " type: boolean_true" + echo " Run auto_lineage just on prokaryota trees to find optimum lineage path." + echo "" + echo " --datasets_version" + echo " type: string" + echo " example: odb10" + echo " Specify the version of BUSCO datasets" + echo "" + echo "Augustus Settings:" + echo " --augustus" + echo " type: boolean_true" + echo " Use augustus gene predictor for eukaryote runs." + echo "" + echo " --augustus_parameters" + echo " type: string" + echo " example: --PARAM1=VALUE1,--PARAM2=VALUE2" + echo " Additional parameters to be passed to Augustus (see Augustus" + echo " documentation:" + echo " " + echo "https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md)." + echo " Parameters should be contained within a single string, without" + echo " whitespace and seperated by commas." + echo "" + echo " --augustus_species" + echo " type: string" + echo " Specify the augustus species" + echo "" + echo " --long" + echo " type: boolean_true" + echo " Optimize Augustus self-training mode. This adds considerably to the run" + echo " time, but can improve results for some non-model organisms." + echo "" + echo "BBTools Settings:" + echo " --contig_break" + echo " type: integer" + echo " Number of contiguous Ns to signify a break between contigs in BBTools" + echo " analysis." + echo "" + echo " --limit" + echo " type: integer" + echo " Number of candidate regions (contig or transcript) from the BLAST output" + echo " to consider per BUSCO." + echo " This option is only effective in pipelines using BLAST, i.e. the genome" + echo " pipeline (see --augustus) or the prokaryota transcriptome pipeline." + echo "" + echo " --scaffold_composition" + echo " type: boolean_true" + echo " Writes ACGTN content per scaffold to a file scaffold_composition.txt." + echo "" + echo "BLAST Settings:" + echo " --e_value" + echo " type: double" + echo " E-value cutoff for BLAST searches." + echo "" + echo "Protein Gene Prediction settings:" + echo " --miniprot" + echo " type: boolean_true" + echo " Use Miniprot gene predictor." + echo "" + echo "MetaEuk Settings:" + echo " --metaeuk" + echo " type: boolean_true" + echo " Use Metaeuk gene predictor." + echo "" + echo " --metaeuk_parameters" + echo " type: string" + echo " example: --max-overlap=15,--min-exon-aa=15" + echo " Pass additional arguments to Metaeuk for the first run (see Metaeuk" + echo " documentation https://github.com/soedinglab/metaeuk)." + echo " All parameters should be contained within a single string with no white" + echo " space, with each parameter separated by a comma." + echo "" + echo " --metaeuk_rerun_parameters" + echo " type: string" + echo " example: --max-overlap=15,--min-exon-aa=15" + echo " Pass additional arguments to Metaeuk for the second run (see Metaeuk" + echo " documentation https://github.com/soedinglab/metaeuk)." + echo " All parameters should be contained within a single string with no white" + echo " space, with each parameter separated by a comma." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/cellranger/cellranger_mkref/.config.vsh.yaml b/target/executable/cellranger/cellranger_mkref/.config.vsh.yaml index f9c27765..2c03bf3b 100644 --- a/target/executable/cellranger/cellranger_mkref/.config.vsh.yaml +++ b/target/executable/cellranger/cellranger_mkref/.config.vsh.yaml @@ -75,6 +75,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -194,16 +197,16 @@ build_info: engine: "docker|native" output: "target/executable/cellranger/cellranger_mkref" executable: "target/executable/cellranger/cellranger_mkref/cellranger_mkref" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/cellranger/cellranger_mkref/cellranger_mkref b/target/executable/cellranger/cellranger_mkref/cellranger_mkref index dc3f7ab4..18e76d38 100755 --- a/target/executable/cellranger/cellranger_mkref/cellranger_mkref +++ b/target/executable/cellranger/cellranger_mkref/cellranger_mkref @@ -2,7 +2,7 @@ # cellranger_mkref main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,33 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellranger_mkref main" - echo "" - echo "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA" - echo "and gene GTF files." - echo "" - echo "Arguments:" - echo " --genome_fasta" - echo " type: file, required parameter, file must exist" - echo " example: genome_sequence.fa.gz" - echo " Reference genome fasta." - echo "" - echo " --transcriptome_gtf" - echo " type: file, required parameter, file must exist" - echo " example: transcriptome_annotation.gtf.gz" - echo " Reference transcriptome annotation." - echo "" - echo " --reference_version" - echo " type: string" - echo " Optional reference version string to include with reference" - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: cellranger_reference" - echo " Output folder" -} # initialise variables VIASH_MODE='run' @@ -481,9 +454,9 @@ RUN apt-get update && \ LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component cellranger cellranger_mkref" -LABEL org.opencontainers.image.created="2024-12-03T10:34:01Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/10XGenomics/cellranger/blob/main/lib/python/cellranger/reference_builder.py" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -598,6 +571,59 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellranger_mkref main" + echo "" + echo "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA" + echo "and gene GTF files." + echo "" + echo "Arguments:" + echo " --genome_fasta" + echo " type: file, required parameter, file must exist" + echo " example: genome_sequence.fa.gz" + echo " Reference genome fasta." + echo "" + echo " --transcriptome_gtf" + echo " type: file, required parameter, file must exist" + echo " example: transcriptome_annotation.gtf.gz" + echo " Reference transcriptome annotation." + echo "" + echo " --reference_version" + echo " type: string" + echo " Optional reference version string to include with reference" + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: cellranger_reference" + echo " Output folder" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/cutadapt/.config.vsh.yaml b/target/executable/cutadapt/.config.vsh.yaml index 6eac2dab..af54c8e4 100644 --- a/target/executable/cutadapt/.config.vsh.yaml +++ b/target/executable/cutadapt/.config.vsh.yaml @@ -633,6 +633,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -739,16 +742,16 @@ build_info: engine: "docker|native" output: "target/executable/cutadapt" executable: "target/executable/cutadapt/cutadapt" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/cutadapt/cutadapt b/target/executable/cutadapt/cutadapt index 37d90bf0..934ad898 100755 --- a/target/executable/cutadapt/cutadapt +++ b/target/executable/cutadapt/cutadapt @@ -2,7 +2,7 @@ # cutadapt main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,407 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:3.12 +ENTRYPOINT [] +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "cutadapt" + +RUN cutadapt --version | sed 's/\(.*\)/cutadapt: "\1"/' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Toni Verbeiren" +LABEL org.opencontainers.image.description="Companion container for running component cutadapt" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" +LABEL org.opencontainers.image.source="https://github.com/marcelm/cutadapt" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "cutadapt main" @@ -547,407 +948,32 @@ function ViashHelp { echo " --debug" echo " type: boolean_true" echo " Print debug information" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM python:3.12 -ENTRYPOINT [] -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "cutadapt" - -RUN cutadapt --version | sed 's/\(.*\)/cutadapt: "\1"/' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Toni Verbeiren" -LABEL org.opencontainers.image.description="Companion container for running component cutadapt" -LABEL org.opencontainers.image.created="2024-12-03T10:33:56Z" -LABEL org.opencontainers.image.source="https://github.com/marcelm/cutadapt" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/falco/.config.vsh.yaml b/target/executable/falco/.config.vsh.yaml index bf8cbe6b..8ef28831 100644 --- a/target/executable/falco/.config.vsh.yaml +++ b/target/executable/falco/.config.vsh.yaml @@ -203,6 +203,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -316,16 +319,16 @@ build_info: engine: "docker|native" output: "target/executable/falco" executable: "target/executable/falco/falco" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/falco/falco b/target/executable/falco/falco index c9a3dd6f..24635cb2 100755 --- a/target/executable/falco/falco +++ b/target/executable/falco/falco @@ -2,7 +2,7 @@ # falco main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,131 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "falco main" - echo "" - echo "A C++ drop-in replacement of FastQC to assess the quality of sequence read data" - echo "" - echo "Input arguments:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input1.fastq;input2.fastq" - echo " input fastq files" - echo "" - echo "Run arguments:" - echo " --nogroup" - echo " type: boolean_true" - echo " Disable grouping of bases for reads >50bp." - echo " All reports will show data for every base in" - echo " the read. WARNING: When using this option," - echo " your plots may end up a ridiculous size. You" - echo " have been warned!" - echo "" - echo " --contaminents" - echo " type: file, file must exist" - echo " Specifies a non-default file which contains" - echo " the list of contaminants to screen" - echo " overrepresented sequences against. The file" - echo " must contain sets of named contaminants in" - echo " the form name[tab]sequence. Lines prefixed" - echo " with a hash will be ignored. Default:" - echo " " - echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt" - echo "" - echo " --adapters" - echo " type: file, file must exist" - echo " Specifies a non-default file which contains" - echo " the list of adapter sequences which will be" - echo " explicity searched against the library. The" - echo " file must contain sets of named adapters in" - echo " the form name[tab]sequence. Lines prefixed" - echo " with a hash will be ignored. Default:" - echo " " - echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt" - echo "" - echo " --limits" - echo " type: file, file must exist" - echo " Specifies a non-default file which contains" - echo " a set of criteria which will be used to" - echo " determine the warn/error limits for the" - echo " various modules. This file can also be used" - echo " to selectively remove some modules from the" - echo " output all together. The format needs to" - echo " mirror the default limits.txt file found in" - echo " the Configuration folder. Default:" - echo " " - echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt" - echo "" - echo " -s, --subsample" - echo " type: integer" - echo " example: 10" - echo " [Falco only] makes falco faster (but" - echo " possibly less accurate) by only processing" - echo " reads that are a multiple of this value (using" - echo " 0-based indexing to number reads)." - echo "" - echo " -b, --bisulfite" - echo " type: boolean_true" - echo " [Falco only] reads are whole genome" - echo " bisulfite sequencing, and more Ts and fewer" - echo " Cs are therefore expected and will be" - echo " accounted for in base content." - echo "" - echo " -r, --reverse_complement" - echo " type: boolean_true" - echo " [Falco only] The input is a" - echo " reverse-complement. All modules will be" - echo " tested by swapping A/T and C/G" - echo "" - echo "Output arguments:" - echo " -o, --outdir" - echo " type: file, required parameter, output, file must exist" - echo " example: output" - echo " Create all output files in the specified" - echo " output directory. FALCO-SPECIFIC: If the" - echo " directory does not exists, the program will" - echo " create it." - echo "" - echo " -f, --format" - echo " type: string" - echo " choices: [ bam, sam, bam_mapped, sam_mapped, fastq, fq, fastq.gz, fq.gz" - echo "]" - echo " Bypasses the normal sequence file format" - echo " detection and forces the program to use the" - echo " specified format. Validformats are bam, sam," - echo " bam_mapped, sam_mapped, fastq, fq, fastq.gz" - echo " or fq.gz." - echo "" - echo " -D, --data_filename" - echo " type: file, output, file must exist" - echo " [Falco only] Specify filename for FastQC" - echo " data output (TXT). If not specified, it will" - echo " be called fastq_data.txt in either the input" - echo " file's directory or the one specified in the" - echo " --output flag. Only available when running" - echo " falco with a single input." - echo "" - echo " -R, --report_filename" - echo " type: file, output, file must exist" - echo " [Falco only] Specify filename for FastQC" - echo " report output (HTML). If not specified, it" - echo " will be called fastq_report.html in either" - echo " the input file's directory or the one" - echo " specified in the --output flag. Only" - echo " available when running falco with a single" - echo " input." - echo "" - echo " -S, --summary_filename" - echo " type: file, output, file must exist" - echo " [Falco only] Specify filename for the short" - echo " summary output (TXT). If not specified, it" - echo " will be called fastq_report.html in either" - echo " the input file's directory or the one" - echo " specified in the --output flag. Only" - echo " available when running falco with a single" - echo " input." -} # initialise variables VIASH_MODE='run' @@ -589,9 +464,9 @@ RUN echo "falco: \"$(falco -v | sed -n 's/^falco //p')\"" > /var/software_versio LABEL org.opencontainers.image.authors="Toni Verbeiren" LABEL org.opencontainers.image.description="Companion container for running component falco" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" LABEL org.opencontainers.image.source="https://github.com/smithlabcode/falco" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -706,6 +581,157 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "falco main" + echo "" + echo "A C++ drop-in replacement of FastQC to assess the quality of sequence read data" + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input1.fastq;input2.fastq" + echo " input fastq files" + echo "" + echo "Run arguments:" + echo " --nogroup" + echo " type: boolean_true" + echo " Disable grouping of bases for reads >50bp." + echo " All reports will show data for every base in" + echo " the read. WARNING: When using this option," + echo " your plots may end up a ridiculous size. You" + echo " have been warned!" + echo "" + echo " --contaminents" + echo " type: file, file must exist" + echo " Specifies a non-default file which contains" + echo " the list of contaminants to screen" + echo " overrepresented sequences against. The file" + echo " must contain sets of named contaminants in" + echo " the form name[tab]sequence. Lines prefixed" + echo " with a hash will be ignored. Default:" + echo " " + echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt" + echo "" + echo " --adapters" + echo " type: file, file must exist" + echo " Specifies a non-default file which contains" + echo " the list of adapter sequences which will be" + echo " explicity searched against the library. The" + echo " file must contain sets of named adapters in" + echo " the form name[tab]sequence. Lines prefixed" + echo " with a hash will be ignored. Default:" + echo " " + echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt" + echo "" + echo " --limits" + echo " type: file, file must exist" + echo " Specifies a non-default file which contains" + echo " a set of criteria which will be used to" + echo " determine the warn/error limits for the" + echo " various modules. This file can also be used" + echo " to selectively remove some modules from the" + echo " output all together. The format needs to" + echo " mirror the default limits.txt file found in" + echo " the Configuration folder. Default:" + echo " " + echo "https://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt" + echo "" + echo " -s, --subsample" + echo " type: integer" + echo " example: 10" + echo " [Falco only] makes falco faster (but" + echo " possibly less accurate) by only processing" + echo " reads that are a multiple of this value (using" + echo " 0-based indexing to number reads)." + echo "" + echo " -b, --bisulfite" + echo " type: boolean_true" + echo " [Falco only] reads are whole genome" + echo " bisulfite sequencing, and more Ts and fewer" + echo " Cs are therefore expected and will be" + echo " accounted for in base content." + echo "" + echo " -r, --reverse_complement" + echo " type: boolean_true" + echo " [Falco only] The input is a" + echo " reverse-complement. All modules will be" + echo " tested by swapping A/T and C/G" + echo "" + echo "Output arguments:" + echo " -o, --outdir" + echo " type: file, required parameter, output, file must exist" + echo " example: output" + echo " Create all output files in the specified" + echo " output directory. FALCO-SPECIFIC: If the" + echo " directory does not exists, the program will" + echo " create it." + echo "" + echo " -f, --format" + echo " type: string" + echo " choices: [ bam, sam, bam_mapped, sam_mapped, fastq, fq, fastq.gz, fq.gz" + echo "]" + echo " Bypasses the normal sequence file format" + echo " detection and forces the program to use the" + echo " specified format. Validformats are bam, sam," + echo " bam_mapped, sam_mapped, fastq, fq, fastq.gz" + echo " or fq.gz." + echo "" + echo " -D, --data_filename" + echo " type: file, output, file must exist" + echo " [Falco only] Specify filename for FastQC" + echo " data output (TXT). If not specified, it will" + echo " be called fastq_data.txt in either the input" + echo " file's directory or the one specified in the" + echo " --output flag. Only available when running" + echo " falco with a single input." + echo "" + echo " -R, --report_filename" + echo " type: file, output, file must exist" + echo " [Falco only] Specify filename for FastQC" + echo " report output (HTML). If not specified, it" + echo " will be called fastq_report.html in either" + echo " the input file's directory or the one" + echo " specified in the --output flag. Only" + echo " available when running falco with a single" + echo " input." + echo "" + echo " -S, --summary_filename" + echo " type: file, output, file must exist" + echo " [Falco only] Specify filename for the short" + echo " summary output (TXT). If not specified, it" + echo " will be called fastq_report.html in either" + echo " the input file's directory or the one" + echo " specified in the --output flag. Only" + echo " available when running falco with a single" + echo " input." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/fastp/.config.vsh.yaml b/target/executable/fastp/.config.vsh.yaml index 2ed2f08c..23ee81e8 100644 --- a/target/executable/fastp/.config.vsh.yaml +++ b/target/executable/fastp/.config.vsh.yaml @@ -982,6 +982,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1082,16 +1085,16 @@ build_info: engine: "docker|native" output: "target/executable/fastp" executable: "target/executable/fastp/fastp" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/fastp/fastp b/target/executable/fastp/fastp index aec9e4e9..cc8d95de 100755 --- a/target/executable/fastp/fastp +++ b/target/executable/fastp/fastp @@ -2,7 +2,7 @@ # fastp main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/fastp:0.23.4--hadf994f_2 +ENTRYPOINT [] +RUN fastp --version 2>&1 | sed 's# #: "#;s#$#"#' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component fastp" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" +LABEL org.opencontainers.image.source="https://github.com/OpenGene/fastp" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "fastp main" @@ -747,404 +1145,32 @@ function ViashHelp { echo " min: 1" echo " One in (--overrepresentation_sampling) reads will be computed for" echo " overrepresentation analysis (1~10000), smaller is slower, default is 20." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/fastp:0.23.4--hadf994f_2 -ENTRYPOINT [] -RUN fastp --version 2>&1 | sed 's# #: "#;s#$#"#' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component fastp" -LABEL org.opencontainers.image.created="2024-12-03T10:34:04Z" -LABEL org.opencontainers.image.source="https://github.com/OpenGene/fastp" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/fastqc/.config.vsh.yaml b/target/executable/fastqc/.config.vsh.yaml index b2e14f8a..43ae634a 100644 --- a/target/executable/fastqc/.config.vsh.yaml +++ b/target/executable/fastqc/.config.vsh.yaml @@ -239,6 +239,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -339,16 +342,16 @@ build_info: engine: "docker|native" output: "target/executable/fastqc" executable: "target/executable/fastqc/fastqc" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/fastqc/fastqc b/target/executable/fastqc/fastqc index aba1d846..d8b185ba 100755 --- a/target/executable/fastqc/fastqc +++ b/target/executable/fastqc/fastqc @@ -2,7 +2,7 @@ # fastqc main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,155 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "fastqc main" - echo "" - echo "FastQC - A high throughput sequence QC analysis tool." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input.fq" - echo " FASTQ file(s) to be analyzed." - echo "" - echo "Outputs:" - echo " At least one of the output options (--html, --zip, --summary, --data) must" - echo " be used." - echo "" - echo " --html" - echo " type: file, multiple values allowed, output, file must exist" - echo " example: *.html" - echo " Create the HTML report of the results." - echo " '*' wild card must be provided in the output file name." - echo " Wild card will be replaced by the input file basename." - echo " e.g." - echo " --input \"sample_1.fq\"" - echo " --html \"*.html\"" - echo " would create an output html file named sample_1.html" - echo "" - echo " --zip" - echo " type: file, multiple values allowed, output, file must exist" - echo " example: *.zip" - echo " Create the zip file(s) containing: html report, data, images, icons," - echo " summary, etc." - echo " '*' wild card must be provided in the output file name." - echo " Wild card will be replaced by the input basename." - echo " e.g." - echo " --input \"sample_1.fq\"" - echo " --html \"*.zip\"" - echo " would create an output zip file named sample_1.zip" - echo "" - echo " --summary" - echo " type: file, multiple values allowed, output, file must exist" - echo " example: *_summary.txt" - echo " Create the summary file(s)." - echo " '*' wild card must be provided in the output file name." - echo " Wild card will be replaced by the input basename." - echo " e.g." - echo " --input \"sample_1.fq\"" - echo " --summary \"*_summary.txt\"" - echo " would create an output summary.txt file named sample_1_summary.txt" - echo "" - echo " --data" - echo " type: file, multiple values allowed, output, file must exist" - echo " example: *_data.txt" - echo " Create the data file(s)." - echo " '*' wild card must be provided in the output file name." - echo " Wild card will be replaced by the input basename." - echo " e.g." - echo " --input \"sample_1.fq\"" - echo " --summary \"*_data.txt\"" - echo " would create an output data.txt file named sample_1_data.txt" - echo "" - echo "Options:" - echo " --casava" - echo " type: boolean_true" - echo " Files come from raw casava output. Files in the same sample" - echo " group (differing only by the group number) will be analysed" - echo " as a set rather than individually. Sequences with the filter" - echo " flag set in the header will be excluded from the analysis." - echo " Files must have the same names given to them by casava" - echo " (including being gzipped and ending with .gz) otherwise they" - echo " won't be grouped together correctly." - echo "" - echo " --nano" - echo " type: boolean_true" - echo " Files come from nanopore sequences and are in fast5 format. In" - echo " this mode you can pass in directories to process and the program" - echo " will take in all fast5 files within those directories and produce" - echo " a single output file from the sequences found in all files." - echo "" - echo " --nofilter" - echo " type: boolean_true" - echo " If running with --casava then don't remove read flagged by" - echo " casava as poor quality when performing the QC analysis." - echo "" - echo " --nogroup" - echo " type: boolean_true" - echo " Disable grouping of bases for reads >50bp." - echo " All reports will show data for every base in the read." - echo " WARNING: Using this option will cause fastqc to crash" - echo " and burn if you use it on really long reads, and your" - echo " plots may end up a ridiculous size. You have been warned!" - echo "" - echo " --min_length" - echo " type: integer" - echo " example: 0" - echo " Sets an artificial lower limit on the length of the" - echo " sequence to be shown in the report. As long as you" - echo " set this to a value greater or equal to your longest" - echo " read length then this will be the sequence length used" - echo " to create your read groups. This can be useful for making" - echo " directly comparable statistics from datasets with somewhat" - echo " variable read lengths." - echo "" - echo " -f, --format" - echo " type: string" - echo " example: bam" - echo " Bypasses the normal sequence file format detection and" - echo " forces the program to use the specified format." - echo " Valid formats are bam, sam, bam_mapped, sam_mapped, and fastq." - echo "" - echo " -c, --contaminants" - echo " type: file, file must exist" - echo " example: contaminants.txt" - echo " Specifies a non-default file which contains the list" - echo " of contaminants to screen overrepresented sequences against." - echo " The file must contain sets of named contaminants in the form" - echo " name[tab]sequence. Lines prefixed with a hash will be ignored." - echo "" - echo " -a, --adapters" - echo " type: file, file must exist" - echo " example: adapters.txt" - echo " Specifies a non-default file which contains the list of" - echo " adapter sequences which will be explicitly searched against" - echo " the library. The file must contain sets of named adapters" - echo " in the form name[tab]sequence. Lines prefixed with a hash will be" - echo " ignored." - echo "" - echo " -l, --limits" - echo " type: file, file must exist" - echo " example: limits.txt" - echo " Specifies a non-default file which contains" - echo " a set of criteria which will be used to determine" - echo " the warn/error limits for the various modules." - echo " This file can also be used to selectively remove" - echo " some modules from the output altogether. The format" - echo " needs to mirror the default limits.txt file found in" - echo " the Configuration folder." - echo "" - echo " -k, --kmers" - echo " type: integer" - echo " example: 7" - echo " Specifies the length of Kmer to look for in the Kmer" - echo " content module. Specified Kmer length must be between" - echo " 2 and 10. Default length is 7 if not specified." - echo "" - echo " -q, --quiet" - echo " type: boolean_true" - echo " Suppress all progress messages on stdout and only report errors." -} # initialise variables VIASH_MODE='run' @@ -601,9 +452,9 @@ RUN echo "fastqc: $(fastqc --version | sed -n 's/^FastQC //p')" > /var/software_ LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component fastqc" -LABEL org.opencontainers.image.created="2024-12-03T10:34:06Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/s-andrews/FastQC" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -718,6 +569,181 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "fastqc main" + echo "" + echo "FastQC - A high throughput sequence QC analysis tool." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input.fq" + echo " FASTQ file(s) to be analyzed." + echo "" + echo "Outputs:" + echo " At least one of the output options (--html, --zip, --summary, --data) must" + echo " be used." + echo "" + echo " --html" + echo " type: file, multiple values allowed, output, file must exist" + echo " example: *.html" + echo " Create the HTML report of the results." + echo " '*' wild card must be provided in the output file name." + echo " Wild card will be replaced by the input file basename." + echo " e.g." + echo " --input \"sample_1.fq\"" + echo " --html \"*.html\"" + echo " would create an output html file named sample_1.html" + echo "" + echo " --zip" + echo " type: file, multiple values allowed, output, file must exist" + echo " example: *.zip" + echo " Create the zip file(s) containing: html report, data, images, icons," + echo " summary, etc." + echo " '*' wild card must be provided in the output file name." + echo " Wild card will be replaced by the input basename." + echo " e.g." + echo " --input \"sample_1.fq\"" + echo " --html \"*.zip\"" + echo " would create an output zip file named sample_1.zip" + echo "" + echo " --summary" + echo " type: file, multiple values allowed, output, file must exist" + echo " example: *_summary.txt" + echo " Create the summary file(s)." + echo " '*' wild card must be provided in the output file name." + echo " Wild card will be replaced by the input basename." + echo " e.g." + echo " --input \"sample_1.fq\"" + echo " --summary \"*_summary.txt\"" + echo " would create an output summary.txt file named sample_1_summary.txt" + echo "" + echo " --data" + echo " type: file, multiple values allowed, output, file must exist" + echo " example: *_data.txt" + echo " Create the data file(s)." + echo " '*' wild card must be provided in the output file name." + echo " Wild card will be replaced by the input basename." + echo " e.g." + echo " --input \"sample_1.fq\"" + echo " --summary \"*_data.txt\"" + echo " would create an output data.txt file named sample_1_data.txt" + echo "" + echo "Options:" + echo " --casava" + echo " type: boolean_true" + echo " Files come from raw casava output. Files in the same sample" + echo " group (differing only by the group number) will be analysed" + echo " as a set rather than individually. Sequences with the filter" + echo " flag set in the header will be excluded from the analysis." + echo " Files must have the same names given to them by casava" + echo " (including being gzipped and ending with .gz) otherwise they" + echo " won't be grouped together correctly." + echo "" + echo " --nano" + echo " type: boolean_true" + echo " Files come from nanopore sequences and are in fast5 format. In" + echo " this mode you can pass in directories to process and the program" + echo " will take in all fast5 files within those directories and produce" + echo " a single output file from the sequences found in all files." + echo "" + echo " --nofilter" + echo " type: boolean_true" + echo " If running with --casava then don't remove read flagged by" + echo " casava as poor quality when performing the QC analysis." + echo "" + echo " --nogroup" + echo " type: boolean_true" + echo " Disable grouping of bases for reads >50bp." + echo " All reports will show data for every base in the read." + echo " WARNING: Using this option will cause fastqc to crash" + echo " and burn if you use it on really long reads, and your" + echo " plots may end up a ridiculous size. You have been warned!" + echo "" + echo " --min_length" + echo " type: integer" + echo " example: 0" + echo " Sets an artificial lower limit on the length of the" + echo " sequence to be shown in the report. As long as you" + echo " set this to a value greater or equal to your longest" + echo " read length then this will be the sequence length used" + echo " to create your read groups. This can be useful for making" + echo " directly comparable statistics from datasets with somewhat" + echo " variable read lengths." + echo "" + echo " -f, --format" + echo " type: string" + echo " example: bam" + echo " Bypasses the normal sequence file format detection and" + echo " forces the program to use the specified format." + echo " Valid formats are bam, sam, bam_mapped, sam_mapped, and fastq." + echo "" + echo " -c, --contaminants" + echo " type: file, file must exist" + echo " example: contaminants.txt" + echo " Specifies a non-default file which contains the list" + echo " of contaminants to screen overrepresented sequences against." + echo " The file must contain sets of named contaminants in the form" + echo " name[tab]sequence. Lines prefixed with a hash will be ignored." + echo "" + echo " -a, --adapters" + echo " type: file, file must exist" + echo " example: adapters.txt" + echo " Specifies a non-default file which contains the list of" + echo " adapter sequences which will be explicitly searched against" + echo " the library. The file must contain sets of named adapters" + echo " in the form name[tab]sequence. Lines prefixed with a hash will be" + echo " ignored." + echo "" + echo " -l, --limits" + echo " type: file, file must exist" + echo " example: limits.txt" + echo " Specifies a non-default file which contains" + echo " a set of criteria which will be used to determine" + echo " the warn/error limits for the various modules." + echo " This file can also be used to selectively remove" + echo " some modules from the output altogether. The format" + echo " needs to mirror the default limits.txt file found in" + echo " the Configuration folder." + echo "" + echo " -k, --kmers" + echo " type: integer" + echo " example: 7" + echo " Specifies the length of Kmer to look for in the Kmer" + echo " content module. Specified Kmer length must be between" + echo " 2 and 10. Default length is 7 if not specified." + echo "" + echo " -q, --quiet" + echo " type: boolean_true" + echo " Suppress all progress messages on stdout and only report errors." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/featurecounts/.config.vsh.yaml b/target/executable/featurecounts/.config.vsh.yaml index b8c2cfcd..ebac86cd 100644 --- a/target/executable/featurecounts/.config.vsh.yaml +++ b/target/executable/featurecounts/.config.vsh.yaml @@ -543,6 +543,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -644,16 +647,16 @@ build_info: engine: "docker|native" output: "target/executable/featurecounts" executable: "target/executable/featurecounts/featurecounts" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/featurecounts/featurecounts b/target/executable/featurecounts/featurecounts index 3d0983b9..0980c026 100755 --- a/target/executable/featurecounts/featurecounts +++ b/target/executable/featurecounts/featurecounts @@ -2,7 +2,7 @@ # featurecounts main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/subread:2.0.6--he4a0461_0 +ENTRYPOINT [] +RUN featureCounts -v 2>&1 | sed 's/featureCounts v\([0-9.]*\)/featureCounts: \1/' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" +LABEL org.opencontainers.image.description="Companion container for running component featurecounts" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" +LABEL org.opencontainers.image.source="https://github.com/ShiLab-Bioinformatics/subread" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "featurecounts main" @@ -473,404 +871,32 @@ function ViashHelp { echo " type: boolean_true" echo " Output verbose information for debugging, such as un-matched" echo " chromosome/contig names." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/subread:2.0.6--he4a0461_0 -ENTRYPOINT [] -RUN featureCounts -v 2>&1 | sed 's/featureCounts v\([0-9.]*\)/featureCounts: \1/' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" -LABEL org.opencontainers.image.description="Companion container for running component featurecounts" -LABEL org.opencontainers.image.created="2024-12-03T10:33:51Z" -LABEL org.opencontainers.image.source="https://github.com/ShiLab-Bioinformatics/subread" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/fq_subsample/.config.vsh.yaml b/target/executable/fq_subsample/.config.vsh.yaml index ef40cbf3..e432c498 100644 --- a/target/executable/fq_subsample/.config.vsh.yaml +++ b/target/executable/fq_subsample/.config.vsh.yaml @@ -88,6 +88,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -189,16 +192,16 @@ build_info: engine: "docker|native" output: "target/executable/fq_subsample" executable: "target/executable/fq_subsample/fq_subsample" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/fq_subsample/fq_subsample b/target/executable/fq_subsample/fq_subsample index 9c0de661..9c3264ec 100755 --- a/target/executable/fq_subsample/fq_subsample +++ b/target/executable/fq_subsample/fq_subsample @@ -2,7 +2,7 @@ # fq_subsample main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,46 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "fq_subsample main" - echo "" - echo "fq subsample outputs a subset of records from single or paired FASTQ files." - echo "" - echo "Input:" - echo " --input_1" - echo " type: file, required parameter, file must exist" - echo " First input fastq file to subsample. Accepts both raw and gzipped FASTQ" - echo " inputs." - echo "" - echo " --input_2" - echo " type: file, file must exist" - echo " Second input fastq files to subsample. Accepts both raw and gzipped" - echo " FASTQ inputs." - echo "" - echo "Output:" - echo " --output_1" - echo " type: file, output, file must exist" - echo " Sampled read 1 fastq files. Output will be gzipped if ends in \`.gz\`." - echo "" - echo " --output_2" - echo " type: file, output, file must exist" - echo " Sampled read 2 fastq files. Output will be gzipped if ends in \`.gz\`." - echo "" - echo "Options:" - echo " --probability" - echo " type: double" - echo " The probability a record is kept, as a percentage (0.0, 1.0). Cannot be" - echo " used with \`record-count\`" - echo "" - echo " --record_count" - echo " type: integer" - echo " The exact number of records to keep. Cannot be used with \`probability\`" - echo "" - echo " --seed" - echo " type: integer" - echo " Seed to use for the random number generator" -} # initialise variables VIASH_MODE='run' @@ -493,9 +453,9 @@ mv target/release/fq /usr/local/bin/ && \ cd / && rm -rf /fq LABEL org.opencontainers.image.description="Companion container for running component fq_subsample" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/stjude-rust-labs/fq" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -610,6 +570,72 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "fq_subsample main" + echo "" + echo "fq subsample outputs a subset of records from single or paired FASTQ files." + echo "" + echo "Input:" + echo " --input_1" + echo " type: file, required parameter, file must exist" + echo " First input fastq file to subsample. Accepts both raw and gzipped FASTQ" + echo " inputs." + echo "" + echo " --input_2" + echo " type: file, file must exist" + echo " Second input fastq files to subsample. Accepts both raw and gzipped" + echo " FASTQ inputs." + echo "" + echo "Output:" + echo " --output_1" + echo " type: file, output, file must exist" + echo " Sampled read 1 fastq files. Output will be gzipped if ends in \`.gz\`." + echo "" + echo " --output_2" + echo " type: file, output, file must exist" + echo " Sampled read 2 fastq files. Output will be gzipped if ends in \`.gz\`." + echo "" + echo "Options:" + echo " --probability" + echo " type: double" + echo " The probability a record is kept, as a percentage (0.0, 1.0). Cannot be" + echo " used with \`record-count\`" + echo "" + echo " --record_count" + echo " type: integer" + echo " The exact number of records to keep. Cannot be used with \`probability\`" + echo "" + echo " --seed" + echo " type: integer" + echo " Seed to use for the random number generator" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/gffread/.config.vsh.yaml b/target/executable/gffread/.config.vsh.yaml index 6f7ecbfe..8329026d 100644 --- a/target/executable/gffread/.config.vsh.yaml +++ b/target/executable/gffread/.config.vsh.yaml @@ -582,6 +582,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -684,16 +687,16 @@ build_info: engine: "docker|native" output: "target/executable/gffread" executable: "target/executable/gffread/gffread" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/gffread/gffread b/target/executable/gffread/gffread index bedae97f..b23177a7 100755 --- a/target/executable/gffread/gffread +++ b/target/executable/gffread/gffread @@ -2,7 +2,7 @@ # gffread main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3 +ENTRYPOINT [] +RUN echo "gffread: \"$(gffread --version 2>&1)\"" > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Emma Rousseau" +LABEL org.opencontainers.image.description="Companion container for running component gffread" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" +LABEL org.opencontainers.image.source="https://github.com/gpertea/gffread" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "gffread main" @@ -526,404 +924,32 @@ function ViashHelp { echo " For --merge option, enforce --no_boundary but also discard overlapping" echo " single-exon transcripts," echo " even on the opposite strand (can be combined with --rm_redudant)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3 -ENTRYPOINT [] -RUN echo "gffread: \"$(gffread --version 2>&1)\"" > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Emma Rousseau" -LABEL org.opencontainers.image.description="Companion container for running component gffread" -LABEL org.opencontainers.image.created="2024-12-03T10:33:54Z" -LABEL org.opencontainers.image.source="https://github.com/gpertea/gffread" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/kallisto/kallisto_index/.config.vsh.yaml b/target/executable/kallisto/kallisto_index/.config.vsh.yaml index 702b8703..8f7e7725 100644 --- a/target/executable/kallisto/kallisto_index/.config.vsh.yaml +++ b/target/executable/kallisto/kallisto_index/.config.vsh.yaml @@ -113,6 +113,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -217,16 +220,16 @@ build_info: engine: "docker|native" output: "target/executable/kallisto/kallisto_index" executable: "target/executable/kallisto/kallisto_index/kallisto_index" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/kallisto/kallisto_index/kallisto_index b/target/executable/kallisto/kallisto_index/kallisto_index index 838ec13f..13e01f32 100755 --- a/target/executable/kallisto/kallisto_index/kallisto_index +++ b/target/executable/kallisto/kallisto_index/kallisto_index @@ -2,7 +2,7 @@ # kallisto_index main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,60 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "kallisto_index main" - echo "" - echo "Build a Kallisto index for the transcriptome to use Kallisto in the" - echo "mapping-based mode." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Path to a FASTA-file containing the transcriptome sequences, either in" - echo " plain text or" - echo " compressed (.gz) format." - echo "" - echo " --d_list" - echo " type: file, file must exist" - echo " Path to a FASTA-file containing sequences to mask from quantification." - echo "" - echo "Output:" - echo " --index" - echo " type: file, output, file must exist" - echo " example: Kallisto_index" - echo "" - echo "Options:" - echo " --kmer_size" - echo " type: integer" - echo " example: 31" - echo " Kmer length passed to indexing step of pseudoaligners (default: '31')." - echo "" - echo " --make_unique" - echo " type: boolean_true" - echo " Replace repeated target names with unique names." - echo "" - echo " --aa" - echo " type: boolean_true" - echo " Generate index from a FASTA-file containing amino acid sequences." - echo "" - echo " --distiguish" - echo " type: boolean_true" - echo " Generate index where sequences are distinguished by the sequence names." - echo "" - echo " -m, --min_size" - echo " type: integer" - echo " Length of minimizers (default: automatically chosen)." - echo "" - echo " -e, --ec_max_size" - echo " type: integer" - echo " Maximum number of targets in an equivalence class (default: no maximum)." - echo "" - echo " -T, --tmp" - echo " type: string" - echo " example: tmp" - echo " Path to a directory for temporary files." -} # initialise variables VIASH_MODE='run' @@ -506,9 +452,9 @@ tar -xzf kallisto_linux-v0.50.1.tar.gz && \ mv kallisto/kallisto /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component kallisto kallisto_index" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/pachterlab/kallisto" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -623,6 +569,86 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "kallisto_index main" + echo "" + echo "Build a Kallisto index for the transcriptome to use Kallisto in the" + echo "mapping-based mode." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Path to a FASTA-file containing the transcriptome sequences, either in" + echo " plain text or" + echo " compressed (.gz) format." + echo "" + echo " --d_list" + echo " type: file, file must exist" + echo " Path to a FASTA-file containing sequences to mask from quantification." + echo "" + echo "Output:" + echo " --index" + echo " type: file, output, file must exist" + echo " example: Kallisto_index" + echo "" + echo "Options:" + echo " --kmer_size" + echo " type: integer" + echo " example: 31" + echo " Kmer length passed to indexing step of pseudoaligners (default: '31')." + echo "" + echo " --make_unique" + echo " type: boolean_true" + echo " Replace repeated target names with unique names." + echo "" + echo " --aa" + echo " type: boolean_true" + echo " Generate index from a FASTA-file containing amino acid sequences." + echo "" + echo " --distiguish" + echo " type: boolean_true" + echo " Generate index where sequences are distinguished by the sequence names." + echo "" + echo " -m, --min_size" + echo " type: integer" + echo " Length of minimizers (default: automatically chosen)." + echo "" + echo " -e, --ec_max_size" + echo " type: integer" + echo " Maximum number of targets in an equivalence class (default: no maximum)." + echo "" + echo " -T, --tmp" + echo " type: string" + echo " example: tmp" + echo " Path to a directory for temporary files." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/kallisto/kallisto_quant/.config.vsh.yaml b/target/executable/kallisto/kallisto_quant/.config.vsh.yaml index 9e8d6ab2..a7905ede 100644 --- a/target/executable/kallisto/kallisto_quant/.config.vsh.yaml +++ b/target/executable/kallisto/kallisto_quant/.config.vsh.yaml @@ -137,6 +137,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -245,16 +248,16 @@ build_info: engine: "docker|native" output: "target/executable/kallisto/kallisto_quant" executable: "target/executable/kallisto/kallisto_quant/kallisto_quant" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/kallisto/kallisto_quant/kallisto_quant b/target/executable/kallisto/kallisto_quant/kallisto_quant index 46f7605e..49416512 100755 --- a/target/executable/kallisto/kallisto_quant/kallisto_quant +++ b/target/executable/kallisto/kallisto_quant/kallisto_quant @@ -2,7 +2,7 @@ # kallisto_quant main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,74 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "kallisto_quant main" - echo "" - echo "Quantifying abundances of transcripts from RNA-Seq data, or more generally of" - echo "target sequences using high-throughput sequencing reads." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " List of input FastQ files of size 1 and 2 for single-end and paired-end" - echo " data, respectively." - echo "" - echo " -i, --index" - echo " type: file, required parameter, file must exist" - echo " Kallisto genome index." - echo "" - echo "Output:" - echo " -o, --output_dir" - echo " type: file, required parameter, output, file must exist" - echo " Directory to write output to." - echo "" - echo " --log" - echo " type: file, output, file must exist" - echo " File containing log information from running kallisto quant" - echo "" - echo "Options:" - echo " --single" - echo " type: boolean_true" - echo " Single end mode." - echo "" - echo " --single_overhang" - echo " type: boolean_true" - echo " Include reads where unobserved rest of fragment is predicted to lie" - echo " outside a transcript." - echo "" - echo " --fr_stranded" - echo " type: boolean_true" - echo " Strand specific reads, first read forward." - echo "" - echo " --rf_stranded" - echo " type: boolean_true" - echo " Strand specific reads, first read reverse." - echo "" - echo " -l, --fragment_length" - echo " type: double" - echo " The estimated average fragment length." - echo "" - echo " -s, --sd" - echo " type: double" - echo " The estimated standard deviation of the fragment length (default: -l, -s" - echo " values are estimated" - echo " from paired end data, but are required when using --single)." - echo "" - echo " --plaintext" - echo " type: boolean_true" - echo " Output plaintext instead of HDF5." - echo "" - echo " -b, --bootstrap_samples" - echo " type: integer" - echo " example: 0" - echo " Number of bootstrap samples to draw. Default: '0'" - echo "" - echo " --seed" - echo " type: integer" - echo " example: 42" - echo " Random seed for bootstrap. Default: '42'" -} # initialise variables VIASH_MODE='run' @@ -522,9 +454,9 @@ mv kallisto/kallisto /usr/local/bin/ RUN echo "kallisto: $(kallisto version | sed 's/kallisto, version //')" > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component kallisto kallisto_quant" -LABEL org.opencontainers.image.created="2024-12-03T10:33:54Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/pachterlab/kallisto" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -639,6 +571,100 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "kallisto_quant main" + echo "" + echo "Quantifying abundances of transcripts from RNA-Seq data, or more generally of" + echo "target sequences using high-throughput sequencing reads." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " List of input FastQ files of size 1 and 2 for single-end and paired-end" + echo " data, respectively." + echo "" + echo " -i, --index" + echo " type: file, required parameter, file must exist" + echo " Kallisto genome index." + echo "" + echo "Output:" + echo " -o, --output_dir" + echo " type: file, required parameter, output, file must exist" + echo " Directory to write output to." + echo "" + echo " --log" + echo " type: file, output, file must exist" + echo " File containing log information from running kallisto quant" + echo "" + echo "Options:" + echo " --single" + echo " type: boolean_true" + echo " Single end mode." + echo "" + echo " --single_overhang" + echo " type: boolean_true" + echo " Include reads where unobserved rest of fragment is predicted to lie" + echo " outside a transcript." + echo "" + echo " --fr_stranded" + echo " type: boolean_true" + echo " Strand specific reads, first read forward." + echo "" + echo " --rf_stranded" + echo " type: boolean_true" + echo " Strand specific reads, first read reverse." + echo "" + echo " -l, --fragment_length" + echo " type: double" + echo " The estimated average fragment length." + echo "" + echo " -s, --sd" + echo " type: double" + echo " The estimated standard deviation of the fragment length (default: -l, -s" + echo " values are estimated" + echo " from paired end data, but are required when using --single)." + echo "" + echo " --plaintext" + echo " type: boolean_true" + echo " Output plaintext instead of HDF5." + echo "" + echo " -b, --bootstrap_samples" + echo " type: integer" + echo " example: 0" + echo " Number of bootstrap samples to draw. Default: '0'" + echo "" + echo " --seed" + echo " type: integer" + echo " example: 42" + echo " Random seed for bootstrap. Default: '42'" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/lofreq/lofreq_call/.config.vsh.yaml b/target/executable/lofreq/lofreq_call/.config.vsh.yaml index 13dc43f8..6b60791a 100644 --- a/target/executable/lofreq/lofreq_call/.config.vsh.yaml +++ b/target/executable/lofreq/lofreq_call/.config.vsh.yaml @@ -403,6 +403,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -506,16 +509,16 @@ build_info: engine: "docker|native" output: "target/executable/lofreq/lofreq_call" executable: "target/executable/lofreq/lofreq_call/lofreq_call" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/lofreq/lofreq_call/lofreq_call b/target/executable/lofreq/lofreq_call/lofreq_call index a7b4e890..5520d8aa 100755 --- a/target/executable/lofreq/lofreq_call/lofreq_call +++ b/target/executable/lofreq/lofreq_call/lofreq_call @@ -2,7 +2,7 @@ # lofreq_call main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,209 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "lofreq_call main" - echo "" - echo "Call variants from a BAM file." - echo "" - echo "LoFreq* (i.e. LoFreq version 2) is a fast and sensitive variant-caller for" - echo "inferring SNVs and indels from next-generation sequencing data. It makes full" - echo "use of base-call qualities and other sources of errors inherent in sequencing" - echo "(e.g. mapping or base/indel alignment uncertainty), which are usually ignored by" - echo "other methods or only used for filtering." - echo "" - echo "LoFreq* can run on almost any type of aligned sequencing data (e.g. Illumina," - echo "IonTorrent or Pacbio) since no machine- or sequencing-technology dependent" - echo "thresholds are used. It automatically adapts to changes in coverage and" - echo "sequencing quality and can therefore be applied to a variety of data-sets e.g." - echo "viral/quasispecies, bacterial, metagenomics or somatic data." - echo "" - echo "LoFreq* is very sensitive; most notably, it is able to predict variants below" - echo "the average base-call quality (i.e. sequencing error rate). Each variant call is" - echo "assigned a p-value which allows for rigorous false positive control. Even though" - echo "it uses no approximations or heuristics, it is very efficient due to several" - echo "runtime optimizations and also provides a (pseudo-)parallel implementation." - echo "LoFreq* is generic and fast enough to be applied to high-coverage data and large" - echo "genomes. On a single processor it takes a minute to analyze Dengue genome" - echo "sequencing data with nearly 4000X coverage, roughly one hour to call SNVs on a" - echo "600X coverage E.coli genome and also roughly an hour to run on a 100X coverage" - echo "human exome dataset." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: normal.bam" - echo " Input BAM file." - echo "" - echo " --input_bai" - echo " type: file, required parameter, file must exist" - echo " example: normal.bai" - echo " Index file for the input BAM file." - echo "" - echo " -f, --ref" - echo " type: file, required parameter, file must exist" - echo " example: reference.fasta" - echo " Indexed reference fasta file (gzip supported). Default: none." - echo "" - echo "Outputs:" - echo " -o, --out" - echo " type: file, required parameter, output, file must exist" - echo " example: output.vcf" - echo " Vcf output file. Default: stdout." - echo "" - echo "Arguments:" - echo " -r, --region" - echo " type: string" - echo " example: chr1:1000-2000" - echo " Limit calls to this region (chrom:start-end). Default: none." - echo "" - echo " -l, --bed" - echo " type: file, file must exist" - echo " example: regions.bed" - echo " List of positions (chr pos) or regions (BED). Default: none." - echo "" - echo " -q, --min_bq" - echo " type: integer" - echo " example: 6" - echo " Skip any base with baseQ smaller than INT. Default: 6." - echo "" - echo " -Q, --min_alt_bq" - echo " type: integer" - echo " example: 6" - echo " Skip alternate bases with baseQ smaller than INT. Default: 6." - echo "" - echo " -R, --def_alt_bq" - echo " type: integer" - echo " example: 0" - echo " Overwrite baseQs of alternate bases (that passed bq filter) with this" - echo " value (-1: use median ref-bq; 0: keep). Default: 0." - echo "" - echo " -j, --min_jq" - echo " type: integer" - echo " example: 0" - echo " Skip any base with joinedQ smaller than INT. Default: 0." - echo "" - echo " -J, --min_alt_jq" - echo " type: integer" - echo " example: 0" - echo " Skip alternate bases with joinedQ smaller than INT. Default: 0." - echo "" - echo " -K, --def_alt_jq" - echo " type: integer" - echo " example: 0" - echo " Overwrite joinedQs of alternate bases (that passed jq filter) with this" - echo " value (-1: use median ref-bq; 0: keep). Default: 0." - echo "" - echo " -B, --no_baq" - echo " type: boolean_true" - echo " Disable use of base-alignment quality (BAQ)." - echo "" - echo " -A, --no_idaq" - echo " type: boolean_true" - echo " Don't use IDAQ values (NOT recommended under ANY circumstances other" - echo " than debugging)." - echo "" - echo " -D, --del_baq" - echo " type: boolean_true" - echo " Delete pre-existing BAQ values, i.e. compute even if already present in" - echo " BAM." - echo "" - echo " -e, --no_ext_baq" - echo " type: boolean_true" - echo " Use 'normal' BAQ (samtools default) instead of extended BAQ (both" - echo " computed on the fly if not already present in lb tag)." - echo "" - echo " -m, --min_mq" - echo " type: integer" - echo " example: 0" - echo " Skip reads with mapping quality smaller than INT. Default: 0." - echo "" - echo " -M, --max_mq" - echo " type: integer" - echo " example: 255" - echo " Cap mapping quality at INT. Default: 255." - echo "" - echo " -N, --no_mq" - echo " type: boolean_true" - echo " Don't merge mapping quality in LoFreq's model." - echo "" - echo " --call_indels" - echo " type: boolean_true" - echo " Enable indel calls (note: preprocess your file to include indel" - echo " alignment qualities!)." - echo "" - echo " --only_indels" - echo " type: boolean_true" - echo " Only call indels; no SNVs." - echo "" - echo " -s, --src_qual" - echo " type: boolean_true" - echo " Enable computation of source quality." - echo "" - echo " -S, --ign_vcf" - echo " type: file, file must exist" - echo " example: variants.vcf" - echo " Ignore variants in this vcf file for source quality computation." - echo " Multiple files can be given separated by commas." - echo "" - echo " -T, --def_nm_q" - echo " type: integer" - echo " example: -1" - echo " If >= 0, then replace non-match base qualities with this default value." - echo " Default: -1." - echo "" - echo " -a, --sig" - echo " type: double" - echo " example: 0.01" - echo " P-Value cutoff / significance level. Default: 0.010000." - echo "" - echo " -b, --bonf" - echo " type: string" - echo " example: dynamic" - echo " Bonferroni factor. 'dynamic' (increase per actually performed test) or" - echo " INT. Default: Dynamic." - echo "" - echo " -C, --min_cov" - echo " type: integer" - echo " example: 1" - echo " Test only positions having at least this coverage. Default: 1." - echo " (note: without --no-default-filter default filters (incl. coverage) kick" - echo " in after predictions are done)." - echo "" - echo " -d, --max_depth" - echo " type: integer" - echo " example: 1000000" - echo " Cap coverage at this depth. Default: 1000000." - echo "" - echo " --illumina_13" - echo " type: boolean_true" - echo " Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded." - echo "" - echo " --use_orphan" - echo " type: boolean_true" - echo " Count anomalous read pairs (i.e. where mate is not aligned properly)." - echo "" - echo " --plp_summary_only" - echo " type: boolean_true" - echo " No variant calling. Just output pileup summary per column." - echo "" - echo " --no_default_filter" - echo " type: boolean_true" - echo " Don't run default 'lofreq filter' automatically after calling variants." - echo "" - echo " --force_overwrite" - echo " type: boolean_true" - echo " Overwrite any existing output." - echo "" - echo " --verbose" - echo " type: boolean_true" - echo " Be verbose." - echo "" - echo " --debug" - echo " type: boolean_true" - echo " Enable debugging." -} # initialise variables VIASH_MODE='run' @@ -656,9 +453,9 @@ echo "lofreq: $version" > /var/software_versions.txt LABEL org.opencontainers.image.authors="Kai Waldrant" LABEL org.opencontainers.image.description="Companion container for running component lofreq lofreq_call" -LABEL org.opencontainers.image.created="2024-12-03T10:34:05Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -773,6 +570,235 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "lofreq_call main" + echo "" + echo "Call variants from a BAM file." + echo "" + echo "LoFreq* (i.e. LoFreq version 2) is a fast and sensitive variant-caller for" + echo "inferring SNVs and indels from next-generation sequencing data. It makes full" + echo "use of base-call qualities and other sources of errors inherent in sequencing" + echo "(e.g. mapping or base/indel alignment uncertainty), which are usually ignored by" + echo "other methods or only used for filtering." + echo "" + echo "LoFreq* can run on almost any type of aligned sequencing data (e.g. Illumina," + echo "IonTorrent or Pacbio) since no machine- or sequencing-technology dependent" + echo "thresholds are used. It automatically adapts to changes in coverage and" + echo "sequencing quality and can therefore be applied to a variety of data-sets e.g." + echo "viral/quasispecies, bacterial, metagenomics or somatic data." + echo "" + echo "LoFreq* is very sensitive; most notably, it is able to predict variants below" + echo "the average base-call quality (i.e. sequencing error rate). Each variant call is" + echo "assigned a p-value which allows for rigorous false positive control. Even though" + echo "it uses no approximations or heuristics, it is very efficient due to several" + echo "runtime optimizations and also provides a (pseudo-)parallel implementation." + echo "LoFreq* is generic and fast enough to be applied to high-coverage data and large" + echo "genomes. On a single processor it takes a minute to analyze Dengue genome" + echo "sequencing data with nearly 4000X coverage, roughly one hour to call SNVs on a" + echo "600X coverage E.coli genome and also roughly an hour to run on a 100X coverage" + echo "human exome dataset." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: normal.bam" + echo " Input BAM file." + echo "" + echo " --input_bai" + echo " type: file, required parameter, file must exist" + echo " example: normal.bai" + echo " Index file for the input BAM file." + echo "" + echo " -f, --ref" + echo " type: file, required parameter, file must exist" + echo " example: reference.fasta" + echo " Indexed reference fasta file (gzip supported). Default: none." + echo "" + echo "Outputs:" + echo " -o, --out" + echo " type: file, required parameter, output, file must exist" + echo " example: output.vcf" + echo " Vcf output file. Default: stdout." + echo "" + echo "Arguments:" + echo " -r, --region" + echo " type: string" + echo " example: chr1:1000-2000" + echo " Limit calls to this region (chrom:start-end). Default: none." + echo "" + echo " -l, --bed" + echo " type: file, file must exist" + echo " example: regions.bed" + echo " List of positions (chr pos) or regions (BED). Default: none." + echo "" + echo " -q, --min_bq" + echo " type: integer" + echo " example: 6" + echo " Skip any base with baseQ smaller than INT. Default: 6." + echo "" + echo " -Q, --min_alt_bq" + echo " type: integer" + echo " example: 6" + echo " Skip alternate bases with baseQ smaller than INT. Default: 6." + echo "" + echo " -R, --def_alt_bq" + echo " type: integer" + echo " example: 0" + echo " Overwrite baseQs of alternate bases (that passed bq filter) with this" + echo " value (-1: use median ref-bq; 0: keep). Default: 0." + echo "" + echo " -j, --min_jq" + echo " type: integer" + echo " example: 0" + echo " Skip any base with joinedQ smaller than INT. Default: 0." + echo "" + echo " -J, --min_alt_jq" + echo " type: integer" + echo " example: 0" + echo " Skip alternate bases with joinedQ smaller than INT. Default: 0." + echo "" + echo " -K, --def_alt_jq" + echo " type: integer" + echo " example: 0" + echo " Overwrite joinedQs of alternate bases (that passed jq filter) with this" + echo " value (-1: use median ref-bq; 0: keep). Default: 0." + echo "" + echo " -B, --no_baq" + echo " type: boolean_true" + echo " Disable use of base-alignment quality (BAQ)." + echo "" + echo " -A, --no_idaq" + echo " type: boolean_true" + echo " Don't use IDAQ values (NOT recommended under ANY circumstances other" + echo " than debugging)." + echo "" + echo " -D, --del_baq" + echo " type: boolean_true" + echo " Delete pre-existing BAQ values, i.e. compute even if already present in" + echo " BAM." + echo "" + echo " -e, --no_ext_baq" + echo " type: boolean_true" + echo " Use 'normal' BAQ (samtools default) instead of extended BAQ (both" + echo " computed on the fly if not already present in lb tag)." + echo "" + echo " -m, --min_mq" + echo " type: integer" + echo " example: 0" + echo " Skip reads with mapping quality smaller than INT. Default: 0." + echo "" + echo " -M, --max_mq" + echo " type: integer" + echo " example: 255" + echo " Cap mapping quality at INT. Default: 255." + echo "" + echo " -N, --no_mq" + echo " type: boolean_true" + echo " Don't merge mapping quality in LoFreq's model." + echo "" + echo " --call_indels" + echo " type: boolean_true" + echo " Enable indel calls (note: preprocess your file to include indel" + echo " alignment qualities!)." + echo "" + echo " --only_indels" + echo " type: boolean_true" + echo " Only call indels; no SNVs." + echo "" + echo " -s, --src_qual" + echo " type: boolean_true" + echo " Enable computation of source quality." + echo "" + echo " -S, --ign_vcf" + echo " type: file, file must exist" + echo " example: variants.vcf" + echo " Ignore variants in this vcf file for source quality computation." + echo " Multiple files can be given separated by commas." + echo "" + echo " -T, --def_nm_q" + echo " type: integer" + echo " example: -1" + echo " If >= 0, then replace non-match base qualities with this default value." + echo " Default: -1." + echo "" + echo " -a, --sig" + echo " type: double" + echo " example: 0.01" + echo " P-Value cutoff / significance level. Default: 0.010000." + echo "" + echo " -b, --bonf" + echo " type: string" + echo " example: dynamic" + echo " Bonferroni factor. 'dynamic' (increase per actually performed test) or" + echo " INT. Default: Dynamic." + echo "" + echo " -C, --min_cov" + echo " type: integer" + echo " example: 1" + echo " Test only positions having at least this coverage. Default: 1." + echo " (note: without --no-default-filter default filters (incl. coverage) kick" + echo " in after predictions are done)." + echo "" + echo " -d, --max_depth" + echo " type: integer" + echo " example: 1000000" + echo " Cap coverage at this depth. Default: 1000000." + echo "" + echo " --illumina_13" + echo " type: boolean_true" + echo " Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded." + echo "" + echo " --use_orphan" + echo " type: boolean_true" + echo " Count anomalous read pairs (i.e. where mate is not aligned properly)." + echo "" + echo " --plp_summary_only" + echo " type: boolean_true" + echo " No variant calling. Just output pileup summary per column." + echo "" + echo " --no_default_filter" + echo " type: boolean_true" + echo " Don't run default 'lofreq filter' automatically after calling variants." + echo "" + echo " --force_overwrite" + echo " type: boolean_true" + echo " Overwrite any existing output." + echo "" + echo " --verbose" + echo " type: boolean_true" + echo " Be verbose." + echo "" + echo " --debug" + echo " type: boolean_true" + echo " Enable debugging." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/lofreq/lofreq_indelqual/.config.vsh.yaml b/target/executable/lofreq/lofreq_indelqual/.config.vsh.yaml index 3501690d..216c0ea3 100644 --- a/target/executable/lofreq/lofreq_indelqual/.config.vsh.yaml +++ b/target/executable/lofreq/lofreq_indelqual/.config.vsh.yaml @@ -109,6 +109,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -214,16 +217,16 @@ build_info: engine: "docker|native" output: "target/executable/lofreq/lofreq_indelqual" executable: "target/executable/lofreq/lofreq_indelqual/lofreq_indelqual" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/lofreq/lofreq_indelqual/lofreq_indelqual b/target/executable/lofreq/lofreq_indelqual/lofreq_indelqual index 387537d0..b612b1d5 100755 --- a/target/executable/lofreq/lofreq_indelqual/lofreq_indelqual +++ b/target/executable/lofreq/lofreq_indelqual/lofreq_indelqual @@ -2,7 +2,7 @@ # lofreq_indelqual main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,54 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "lofreq_indelqual main" - echo "" - echo "Insert indel qualities into BAM file (required for indel predictions)." - echo "" - echo "The preferred way of inserting indel qualities should be via GATK's BQSR (>=2)" - echo "If that's not possible, use this subcommand." - echo "The command has two modes: 'uniform' and 'dindel':" - echo "- 'uniform' will assign a given value uniformly, whereas" - echo "- 'dindel' will insert indel qualities based on Dindel (PMID 20980555)." - echo "Both will overwrite any existing values." - echo "Do not realign your BAM file afterwards!" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: normal.bam" - echo " Input BAM file." - echo "" - echo " -f, --ref" - echo " type: file, file must exist" - echo " example: reference.fasta" - echo " Reference sequence used for mapping (Only required for --dindel)." - echo "" - echo "Outputs:" - echo " -o, --out" - echo " type: file, required parameter, output, file must exist" - echo " example: output.bam" - echo " Output BAM file." - echo "" - echo "Arguments:" - echo " -u, --uniform" - echo " type: string" - echo " example: 50,50" - echo " Add this indel quality uniformly to all bases. Use two comma separated" - echo " values to specify insertion and deletion quality separately. (clashes" - echo " with --dindel)." - echo "" - echo " --dindel" - echo " type: boolean_true" - echo " Add Dindel's indel qualities (Illumina specific) (clashes with -u; needs" - echo " --ref)." - echo "" - echo " --verbose" - echo " type: boolean_true" - echo " Be verbose." -} # initialise variables VIASH_MODE='run' @@ -501,9 +453,9 @@ echo "lofreq: $version" > /var/software_versions.txt LABEL org.opencontainers.image.authors="Kai Waldrant" LABEL org.opencontainers.image.description="Companion container for running component lofreq lofreq_indelqual" -LABEL org.opencontainers.image.created="2024-12-03T10:34:05Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -618,6 +570,80 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "lofreq_indelqual main" + echo "" + echo "Insert indel qualities into BAM file (required for indel predictions)." + echo "" + echo "The preferred way of inserting indel qualities should be via GATK's BQSR (>=2)" + echo "If that's not possible, use this subcommand." + echo "The command has two modes: 'uniform' and 'dindel':" + echo "- 'uniform' will assign a given value uniformly, whereas" + echo "- 'dindel' will insert indel qualities based on Dindel (PMID 20980555)." + echo "Both will overwrite any existing values." + echo "Do not realign your BAM file afterwards!" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: normal.bam" + echo " Input BAM file." + echo "" + echo " -f, --ref" + echo " type: file, file must exist" + echo " example: reference.fasta" + echo " Reference sequence used for mapping (Only required for --dindel)." + echo "" + echo "Outputs:" + echo " -o, --out" + echo " type: file, required parameter, output, file must exist" + echo " example: output.bam" + echo " Output BAM file." + echo "" + echo "Arguments:" + echo " -u, --uniform" + echo " type: string" + echo " example: 50,50" + echo " Add this indel quality uniformly to all bases. Use two comma separated" + echo " values to specify insertion and deletion quality separately. (clashes" + echo " with --dindel)." + echo "" + echo " --dindel" + echo " type: boolean_true" + echo " Add Dindel's indel qualities (Illumina specific) (clashes with -u; needs" + echo " --ref)." + echo "" + echo " --verbose" + echo " type: boolean_true" + echo " Be verbose." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/multiqc/.config.vsh.yaml b/target/executable/multiqc/.config.vsh.yaml index 225831b9..8826f0ae 100644 --- a/target/executable/multiqc/.config.vsh.yaml +++ b/target/executable/multiqc/.config.vsh.yaml @@ -357,6 +357,9 @@ info: doi: "10.1093/bioinformatics/btw354" licence: "GPL v3 or later" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -455,16 +458,16 @@ build_info: engine: "docker|native" output: "target/executable/multiqc" executable: "target/executable/multiqc/multiqc" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/multiqc/multiqc b/target/executable/multiqc/multiqc index 3b25f831..debdcfd8 100755 --- a/target/executable/multiqc/multiqc +++ b/target/executable/multiqc/multiqc @@ -2,7 +2,7 @@ # multiqc main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,191 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "multiqc main" - echo "" - echo "MultiQC aggregates results from bioinformatics analyses across many samples into" - echo "a single report." - echo "It searches a given directory for analysis logs and compiles a HTML report. It's" - echo "a general use tool, perfect for summarising the output from numerous" - echo "bioinformatics tools." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: data/results" - echo " File paths to be searched for analysis results to be included in the" - echo " report." - echo "" - echo "Ouput:" - echo " --output_report" - echo " type: file, output" - echo " example: multiqc_report.html" - echo " Filepath of the generated report." - echo "" - echo " --output_data" - echo " type: file, output" - echo " example: multiqc_data" - echo " Output directory for parsed data files. If not provided, parsed data" - echo " will not be published." - echo "" - echo " --output_plots" - echo " type: file, output" - echo " example: multiqc_plots" - echo " Output directory for generated plots. If not provided, plots will not be" - echo " published." - echo "" - echo "Modules and analyses to run:" - echo " --include_modules" - echo " type: string, multiple values allowed" - echo " example: fastqc;cutadapt" - echo " Use only these module" - echo "" - echo " --exclude_modules" - echo " type: string, multiple values allowed" - echo " example: fastqc;cutadapt" - echo " Do not use only these modules" - echo "" - echo " --ignore_analysis" - echo " type: string, multiple values allowed" - echo " example: run_one/*;run_two/*" - echo "" - echo " --ignore_samples" - echo " type: string, multiple values allowed" - echo " example: sample_1*;sample_3*" - echo "" - echo " --ignore_symlinks" - echo " type: boolean_true" - echo " Ignore symlinked directories and files" - echo "" - echo "Sample name handling:" - echo " --dirs" - echo " type: boolean_true" - echo " Prepend directory to sample names to avoid clashing filenames" - echo "" - echo " --dirs_depth" - echo " type: integer" - echo " Prepend n directories to sample names. Negative number to take from" - echo " start of path." - echo "" - echo " --full_names" - echo " type: boolean_true" - echo " Do not clean the sample names (leave as full file name)" - echo "" - echo " --fn_as_s_name" - echo " type: boolean_true" - echo " Use the log filename as the sample name" - echo "" - echo " --replace_names" - echo " type: file, file must exist" - echo " example: replace_names.tsv" - echo " TSV file to rename sample names during report generation" - echo "" - echo "Report Customisation:" - echo " --title" - echo " type: string" - echo " Report title. Printed as page header, used for filename if not otherwise" - echo " specified." - echo "" - echo " --comment" - echo " type: string" - echo " Custom comment, will be printed at the top of the report." - echo "" - echo " --template" - echo " type: string" - echo " choices: [ default, gathered, geo, highcharts, sections, simple ]" - echo " Report template to use." - echo "" - echo " --sample_names" - echo " type: file, file must exist" - echo " example: sample_names.tsv" - echo " TSV file containing alternative sample names for renaming buttons in the" - echo " report." - echo "" - echo " --sample_filters" - echo " type: file, file must exist" - echo " example: sample_filters.tsv" - echo " TSV file containing show/hide patterns for the report" - echo "" - echo " --custom_css_file" - echo " type: file, file must exist" - echo " example: custom_style_sheet.css" - echo " Custom CSS file to add to the final report" - echo "" - echo " --profile_runtime" - echo " type: boolean_true" - echo " Add analysis of how long MultiQC takes to run to the report" - echo "" - echo "MultiQC behaviour:" - echo " --verbose" - echo " type: boolean_true" - echo " Increase output verbosity." - echo "" - echo " --quiet" - echo " type: boolean_true" - echo " Only show log warnings" - echo "" - echo " --strict" - echo " type: boolean_true" - echo " Don't catch exceptions, run additional code checks to help development." - echo "" - echo " --development" - echo " type: boolean_true" - echo " Development mode. Do not compress and minimise JS, export uncompressed" - echo " plot data." - echo "" - echo " --require_logs" - echo " type: boolean_true" - echo " Require all explicitly requested modules to have log files. If not," - echo " MultiQC will exit with an error." - echo "" - echo " --no_megaqc_upload" - echo " type: boolean_true" - echo " Don't upload generated report to MegaQC, even if MegaQC options are" - echo " found." - echo "" - echo " --no_ansi" - echo " type: boolean_true" - echo " Disable coloured log output." - echo "" - echo " --cl_config" - echo " type: string" - echo " example: qualimap_config: { general_stats_coverage: [20,40,200] }" - echo " YAML formatted string that allows to customize MultiQC behaviour like" - echo " input file detection." - echo "" - echo "Output format:" - echo " --flat" - echo " type: boolean_true" - echo " Use only flat plots (static images)." - echo "" - echo " --interactive" - echo " type: boolean_true" - echo " Use only interactive plots (in-browser Javascript)." - echo "" - echo " --data_dir" - echo " type: boolean_true" - echo " Force the parsed data directory to be created." - echo "" - echo " --no_data_dir" - echo " type: boolean_true" - echo " Prevent the parsed data directory from being created." - echo "" - echo " --zip_data_dir" - echo " type: boolean_true" - echo " Compress the data directory." - echo "" - echo " --data_format" - echo " type: string" - echo " choices: [ tsv, csv, json, yaml ]" - echo " Output parsed data in a different format than the default 'txt'." - echo "" - echo " --pdf" - echo " type: boolean_true" - echo " Creates PDF report with the 'simple' template. Requires Pandoc to be" - echo " installed." -} # initialise variables VIASH_MODE='run' @@ -637,9 +452,9 @@ RUN multiqc --version | sed 's/multiqc, version\s\(.*\)/multiqc: "\1"/' > /var/s LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component multiqc" -LABEL org.opencontainers.image.created="2024-12-03T10:33:55Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:44Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -754,6 +569,217 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multiqc main" + echo "" + echo "MultiQC aggregates results from bioinformatics analyses across many samples into" + echo "a single report." + echo "It searches a given directory for analysis logs and compiles a HTML report. It's" + echo "a general use tool, perfect for summarising the output from numerous" + echo "bioinformatics tools." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: data/results" + echo " File paths to be searched for analysis results to be included in the" + echo " report." + echo "" + echo "Ouput:" + echo " --output_report" + echo " type: file, output" + echo " example: multiqc_report.html" + echo " Filepath of the generated report." + echo "" + echo " --output_data" + echo " type: file, output" + echo " example: multiqc_data" + echo " Output directory for parsed data files. If not provided, parsed data" + echo " will not be published." + echo "" + echo " --output_plots" + echo " type: file, output" + echo " example: multiqc_plots" + echo " Output directory for generated plots. If not provided, plots will not be" + echo " published." + echo "" + echo "Modules and analyses to run:" + echo " --include_modules" + echo " type: string, multiple values allowed" + echo " example: fastqc;cutadapt" + echo " Use only these module" + echo "" + echo " --exclude_modules" + echo " type: string, multiple values allowed" + echo " example: fastqc;cutadapt" + echo " Do not use only these modules" + echo "" + echo " --ignore_analysis" + echo " type: string, multiple values allowed" + echo " example: run_one/*;run_two/*" + echo "" + echo " --ignore_samples" + echo " type: string, multiple values allowed" + echo " example: sample_1*;sample_3*" + echo "" + echo " --ignore_symlinks" + echo " type: boolean_true" + echo " Ignore symlinked directories and files" + echo "" + echo "Sample name handling:" + echo " --dirs" + echo " type: boolean_true" + echo " Prepend directory to sample names to avoid clashing filenames" + echo "" + echo " --dirs_depth" + echo " type: integer" + echo " Prepend n directories to sample names. Negative number to take from" + echo " start of path." + echo "" + echo " --full_names" + echo " type: boolean_true" + echo " Do not clean the sample names (leave as full file name)" + echo "" + echo " --fn_as_s_name" + echo " type: boolean_true" + echo " Use the log filename as the sample name" + echo "" + echo " --replace_names" + echo " type: file, file must exist" + echo " example: replace_names.tsv" + echo " TSV file to rename sample names during report generation" + echo "" + echo "Report Customisation:" + echo " --title" + echo " type: string" + echo " Report title. Printed as page header, used for filename if not otherwise" + echo " specified." + echo "" + echo " --comment" + echo " type: string" + echo " Custom comment, will be printed at the top of the report." + echo "" + echo " --template" + echo " type: string" + echo " choices: [ default, gathered, geo, highcharts, sections, simple ]" + echo " Report template to use." + echo "" + echo " --sample_names" + echo " type: file, file must exist" + echo " example: sample_names.tsv" + echo " TSV file containing alternative sample names for renaming buttons in the" + echo " report." + echo "" + echo " --sample_filters" + echo " type: file, file must exist" + echo " example: sample_filters.tsv" + echo " TSV file containing show/hide patterns for the report" + echo "" + echo " --custom_css_file" + echo " type: file, file must exist" + echo " example: custom_style_sheet.css" + echo " Custom CSS file to add to the final report" + echo "" + echo " --profile_runtime" + echo " type: boolean_true" + echo " Add analysis of how long MultiQC takes to run to the report" + echo "" + echo "MultiQC behaviour:" + echo " --verbose" + echo " type: boolean_true" + echo " Increase output verbosity." + echo "" + echo " --quiet" + echo " type: boolean_true" + echo " Only show log warnings" + echo "" + echo " --strict" + echo " type: boolean_true" + echo " Don't catch exceptions, run additional code checks to help development." + echo "" + echo " --development" + echo " type: boolean_true" + echo " Development mode. Do not compress and minimise JS, export uncompressed" + echo " plot data." + echo "" + echo " --require_logs" + echo " type: boolean_true" + echo " Require all explicitly requested modules to have log files. If not," + echo " MultiQC will exit with an error." + echo "" + echo " --no_megaqc_upload" + echo " type: boolean_true" + echo " Don't upload generated report to MegaQC, even if MegaQC options are" + echo " found." + echo "" + echo " --no_ansi" + echo " type: boolean_true" + echo " Disable coloured log output." + echo "" + echo " --cl_config" + echo " type: string" + echo " example: qualimap_config: { general_stats_coverage: [20,40,200] }" + echo " YAML formatted string that allows to customize MultiQC behaviour like" + echo " input file detection." + echo "" + echo "Output format:" + echo " --flat" + echo " type: boolean_true" + echo " Use only flat plots (static images)." + echo "" + echo " --interactive" + echo " type: boolean_true" + echo " Use only interactive plots (in-browser Javascript)." + echo "" + echo " --data_dir" + echo " type: boolean_true" + echo " Force the parsed data directory to be created." + echo "" + echo " --no_data_dir" + echo " type: boolean_true" + echo " Prevent the parsed data directory from being created." + echo "" + echo " --zip_data_dir" + echo " type: boolean_true" + echo " Compress the data directory." + echo "" + echo " --data_format" + echo " type: string" + echo " choices: [ tsv, csv, json, yaml ]" + echo " Output parsed data in a different format than the default 'txt'." + echo "" + echo " --pdf" + echo " type: boolean_true" + echo " Creates PDF report with the 'simple' template. Requires Pandoc to be" + echo " installed." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/nanoplot/.config.vsh.yaml b/target/executable/nanoplot/.config.vsh.yaml index 1db24a0c..79b4f912 100644 --- a/target/executable/nanoplot/.config.vsh.yaml +++ b/target/executable/nanoplot/.config.vsh.yaml @@ -390,6 +390,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -491,16 +494,16 @@ build_info: engine: "docker|native" output: "target/executable/nanoplot" executable: "target/executable/nanoplot/nanoplot" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/nanoplot/nanoplot b/target/executable/nanoplot/nanoplot index a5526779..69db380f 100755 --- a/target/executable/nanoplot/nanoplot +++ b/target/executable/nanoplot/nanoplot @@ -2,7 +2,7 @@ # nanoplot main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,218 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "nanoplot main" - echo "" - echo "Run NanoPlot on nanopore-sequenced reads." - echo "NanoPlot is a plotting tool for long read sequencing data and alignments." - echo "" - echo "Inputs:" - echo " --fastq" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.fq" - echo " Input fastq file(s), separated by \";\"." - echo "" - echo " --fasta" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.fa" - echo " Input fasta file(s), separated by \";\"." - echo "" - echo " --fastq_rich" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.fq" - echo " Input fastq file(s) generated by albacore or" - echo " MinKNOW with additional information concerning channel and time," - echo " separated by \";\"." - echo "" - echo " --fastq_minimal" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.fq" - echo " Input fastq file(s) generated by albacore or MinKNOW with" - echo " additional information concerning channel and time. Minimal data is" - echo " extracted" - echo " swiftly without elaborate checks. Separated by \";\"." - echo "" - echo " --summary" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.txt" - echo " Input summary file(s) generated by albacore or guppy, separated by \";\"." - echo "" - echo " --bam" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.bam" - echo " Input sorted bam file(s), separated by \";\"." - echo "" - echo " --ubam" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.ubam" - echo " Input unmapped bam file(s), separated by \";\"." - echo "" - echo " --cram" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.cram" - echo " Input sorted cram file(s), separated by \";\"." - echo "" - echo " --pickle" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.pkl" - echo " Input pickle file stored earlier, separated by \";\"." - echo "" - echo " --arrow, --feather" - echo " type: file, multiple values allowed, file must exist" - echo " example: read.arrow" - echo " Input feather file(s), separated by \";\"." - echo "" - echo "Outputs:" - echo " -o, --outdir" - echo " type: file, required parameter, output, file must exist" - echo " Specify directory in which output has to be created." - echo "" - echo "Options:" - echo " --verbose" - echo " type: boolean_true" - echo " Write log messages also to terminal" - echo "" - echo " --store" - echo " type: boolean_true" - echo " Store the extracted data in a pickle file for future plotting." - echo "" - echo " --raw" - echo " type: boolean_true" - echo " Store the extracted data in tab separated file." - echo "" - echo " --huge" - echo " type: boolean_true" - echo " Input data is one very large file." - echo "" - echo " --no_static" - echo " type: boolean_false" - echo " Do not make static (png) plots." - echo "" - echo " -p, --prefix" - echo " type: string" - echo " Specify an optional prefix to be used for the output files." - echo "" - echo " --tsv_stats" - echo " type: boolean_true" - echo " Output the stats file as a properly formatted TSV." - echo "" - echo " --only_report" - echo " type: boolean_true" - echo " Output only the report." - echo "" - echo " --info_in_report" - echo " type: boolean_true" - echo " Add NanoPlot run info in the report." - echo "" - echo "Filtering or transforming input:" - echo " --maxlength" - echo " type: integer" - echo " Drop reads longer than length specified." - echo "" - echo " --minlength" - echo " type: integer" - echo " Drop reads shorter than length specified." - echo "" - echo " --drop_outliers" - echo " type: boolean_false" - echo " Drop outlier reads with extreme long length." - echo "" - echo " --downsample" - echo " type: integer" - echo " Reduce dataset to N reads by random sampling." - echo "" - echo " --loglength" - echo " type: boolean_true" - echo " Logarithmic scaling of lengths in plots." - echo "" - echo " --percentqual" - echo " type: boolean_true" - echo " Use qualities as theoretical percent identities." - echo "" - echo " --alength" - echo " type: boolean_true" - echo " Use aligned read lengths rather than sequenced length (bam mode)." - echo "" - echo " --minqual" - echo " type: integer" - echo " Drop reads with an average quality lower than specified." - echo "" - echo " --runtime_until" - echo " type: integer" - echo " Only take the N first hours of a run." - echo "" - echo " --readtype" - echo " type: string" - echo " Which read type to extract information about from summary." - echo " Options are 1D, 2D, 1D2" - echo "" - echo " --barcoded" - echo " type: boolean_true" - echo " Use if you want to split the summary file by barcode." - echo "" - echo " --no_supplementary" - echo " type: boolean_false" - echo " Use if you want to remove supplementary alignments." - echo "" - echo "Customizing plots:" - echo " -c, --color" - echo " type: string" - echo " Specify a color for the plots, must be a valid matplotlib color." - echo "" - echo " -cm, --colormap" - echo " type: string" - echo " Specify a valid matplotlib colormap for the heatmap." - echo "" - echo " -f, --format" - echo " type: string" - echo " default: png" - echo " Specify the output format of the plots." - echo " {eps,jpeg,jpg,pdf,pgf,png,ps,raw,rgba,svg,svgz,tif,tiff}" - echo "" - echo " --plots" - echo " type: string" - echo " Specify which bivariate plots have to be made." - echo " [{kde,hex,dot} ...]" - echo "" - echo " --legacy" - echo " type: string" - echo " Specify which bivariate plots have to be made (legacy mode)." - echo " [{kde,dot,hex} ...]" - echo "" - echo " --listcolors" - echo " type: boolean_true" - echo " List the colors which are available for plotting and exit." - echo "" - echo " --listcolormaps" - echo " type: boolean_true" - echo " List the colormaps which are available for plotting and exit." - echo "" - echo " --no_N50" - echo " type: boolean_false" - echo " Hide the N50 mark in the read length histogram." - echo "" - echo " --N50" - echo " type: boolean_true" - echo " Show the N50 mark in the read length histogram." - echo "" - echo " --title" - echo " type: string" - echo " Add a title to all plots, requires quoting if using spaces." - echo "" - echo " --font_scale" - echo " type: double" - echo " Scale the font of the plots by a factor." - echo "" - echo " --dpi" - echo " type: integer" - echo " Set the dpi for saving images." - echo "" - echo " --hide_stats" - echo " type: boolean_false" - echo " Not adding Pearson R stats in some bivariate plots." -} # initialise variables VIASH_MODE='run' @@ -661,9 +449,9 @@ RUN version=$(NanoPlot --version) && \ echo "$version" > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component nanoplot" -LABEL org.opencontainers.image.created="2024-12-03T10:33:56Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/wdecoster/NanoPlot" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -778,6 +566,244 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "nanoplot main" + echo "" + echo "Run NanoPlot on nanopore-sequenced reads." + echo "NanoPlot is a plotting tool for long read sequencing data and alignments." + echo "" + echo "Inputs:" + echo " --fastq" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.fq" + echo " Input fastq file(s), separated by \";\"." + echo "" + echo " --fasta" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.fa" + echo " Input fasta file(s), separated by \";\"." + echo "" + echo " --fastq_rich" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.fq" + echo " Input fastq file(s) generated by albacore or" + echo " MinKNOW with additional information concerning channel and time," + echo " separated by \";\"." + echo "" + echo " --fastq_minimal" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.fq" + echo " Input fastq file(s) generated by albacore or MinKNOW with" + echo " additional information concerning channel and time. Minimal data is" + echo " extracted" + echo " swiftly without elaborate checks. Separated by \";\"." + echo "" + echo " --summary" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.txt" + echo " Input summary file(s) generated by albacore or guppy, separated by \";\"." + echo "" + echo " --bam" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.bam" + echo " Input sorted bam file(s), separated by \";\"." + echo "" + echo " --ubam" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.ubam" + echo " Input unmapped bam file(s), separated by \";\"." + echo "" + echo " --cram" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.cram" + echo " Input sorted cram file(s), separated by \";\"." + echo "" + echo " --pickle" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.pkl" + echo " Input pickle file stored earlier, separated by \";\"." + echo "" + echo " --arrow, --feather" + echo " type: file, multiple values allowed, file must exist" + echo " example: read.arrow" + echo " Input feather file(s), separated by \";\"." + echo "" + echo "Outputs:" + echo " -o, --outdir" + echo " type: file, required parameter, output, file must exist" + echo " Specify directory in which output has to be created." + echo "" + echo "Options:" + echo " --verbose" + echo " type: boolean_true" + echo " Write log messages also to terminal" + echo "" + echo " --store" + echo " type: boolean_true" + echo " Store the extracted data in a pickle file for future plotting." + echo "" + echo " --raw" + echo " type: boolean_true" + echo " Store the extracted data in tab separated file." + echo "" + echo " --huge" + echo " type: boolean_true" + echo " Input data is one very large file." + echo "" + echo " --no_static" + echo " type: boolean_false" + echo " Do not make static (png) plots." + echo "" + echo " -p, --prefix" + echo " type: string" + echo " Specify an optional prefix to be used for the output files." + echo "" + echo " --tsv_stats" + echo " type: boolean_true" + echo " Output the stats file as a properly formatted TSV." + echo "" + echo " --only_report" + echo " type: boolean_true" + echo " Output only the report." + echo "" + echo " --info_in_report" + echo " type: boolean_true" + echo " Add NanoPlot run info in the report." + echo "" + echo "Filtering or transforming input:" + echo " --maxlength" + echo " type: integer" + echo " Drop reads longer than length specified." + echo "" + echo " --minlength" + echo " type: integer" + echo " Drop reads shorter than length specified." + echo "" + echo " --drop_outliers" + echo " type: boolean_false" + echo " Drop outlier reads with extreme long length." + echo "" + echo " --downsample" + echo " type: integer" + echo " Reduce dataset to N reads by random sampling." + echo "" + echo " --loglength" + echo " type: boolean_true" + echo " Logarithmic scaling of lengths in plots." + echo "" + echo " --percentqual" + echo " type: boolean_true" + echo " Use qualities as theoretical percent identities." + echo "" + echo " --alength" + echo " type: boolean_true" + echo " Use aligned read lengths rather than sequenced length (bam mode)." + echo "" + echo " --minqual" + echo " type: integer" + echo " Drop reads with an average quality lower than specified." + echo "" + echo " --runtime_until" + echo " type: integer" + echo " Only take the N first hours of a run." + echo "" + echo " --readtype" + echo " type: string" + echo " Which read type to extract information about from summary." + echo " Options are 1D, 2D, 1D2" + echo "" + echo " --barcoded" + echo " type: boolean_true" + echo " Use if you want to split the summary file by barcode." + echo "" + echo " --no_supplementary" + echo " type: boolean_false" + echo " Use if you want to remove supplementary alignments." + echo "" + echo "Customizing plots:" + echo " -c, --color" + echo " type: string" + echo " Specify a color for the plots, must be a valid matplotlib color." + echo "" + echo " -cm, --colormap" + echo " type: string" + echo " Specify a valid matplotlib colormap for the heatmap." + echo "" + echo " -f, --format" + echo " type: string" + echo " default: png" + echo " Specify the output format of the plots." + echo " {eps,jpeg,jpg,pdf,pgf,png,ps,raw,rgba,svg,svgz,tif,tiff}" + echo "" + echo " --plots" + echo " type: string" + echo " Specify which bivariate plots have to be made." + echo " [{kde,hex,dot} ...]" + echo "" + echo " --legacy" + echo " type: string" + echo " Specify which bivariate plots have to be made (legacy mode)." + echo " [{kde,dot,hex} ...]" + echo "" + echo " --listcolors" + echo " type: boolean_true" + echo " List the colors which are available for plotting and exit." + echo "" + echo " --listcolormaps" + echo " type: boolean_true" + echo " List the colormaps which are available for plotting and exit." + echo "" + echo " --no_N50" + echo " type: boolean_false" + echo " Hide the N50 mark in the read length histogram." + echo "" + echo " --N50" + echo " type: boolean_true" + echo " Show the N50 mark in the read length histogram." + echo "" + echo " --title" + echo " type: string" + echo " Add a title to all plots, requires quoting if using spaces." + echo "" + echo " --font_scale" + echo " type: double" + echo " Scale the font of the plots by a factor." + echo "" + echo " --dpi" + echo " type: integer" + echo " Set the dpi for saving images." + echo "" + echo " --hide_stats" + echo " type: boolean_false" + echo " Not adding Pearson R stats in some bivariate plots." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/pear/.config.vsh.yaml b/target/executable/pear/.config.vsh.yaml index ddfa2de3..8194853a 100644 --- a/target/executable/pear/.config.vsh.yaml +++ b/target/executable/pear/.config.vsh.yaml @@ -295,6 +295,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -397,16 +400,16 @@ build_info: engine: "docker|native" output: "target/executable/pear" executable: "target/executable/pear/pear" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/pear/pear b/target/executable/pear/pear index 15ec8b34..0efc9bed 100755 --- a/target/executable/pear/pear +++ b/target/executable/pear/pear @@ -2,7 +2,7 @@ # pear main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,150 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "pear main" - echo "" - echo "PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger." - echo "It is fully parallelized and can run with as low as just a few kilobytes of" - echo "memory." - echo "" - echo "PEAR evaluates all possible paired-end read overlaps and without requiring the" - echo "target fragment size as input. In addition, it implements a statistical test for" - echo "minimizing false-positive results. Together with a highly optimized" - echo "implementation, it can merge millions of paired end reads within a couple of" - echo "minutes on a standard desktop computer." - echo "" - echo "Inputs:" - echo " -f, --forward_fastq" - echo " type: file, required parameter, file must exist" - echo " example: forward.fastq" - echo " Forward paired-end FASTQ file" - echo "" - echo " -r, --reverse_fastq" - echo " type: file, required parameter, file must exist" - echo " example: reverse.fastq" - echo " Reverse paired-end FASTQ file" - echo "" - echo "Outputs:" - echo " --assembled" - echo " type: file, required parameter, output, file must exist" - echo " The output file containing assembled reads. Can be compressed with gzip." - echo "" - echo " --unassembled_forward" - echo " type: file, required parameter, output, file must exist" - echo " The output file containing forward reads that could not be assembled." - echo " Can be compressed with gzip." - echo "" - echo " --unassembled_reverse" - echo " type: file, required parameter, output, file must exist" - echo " The output file containing reverse reads that could not be assembled." - echo " Can be compressed with gzip." - echo "" - echo " --discarded" - echo " type: file, required parameter, output, file must exist" - echo " The output file containing reads that were discarded due to too low" - echo " quality or too many uncalled bases. Can be compressed with gzip." - echo "" - echo "Arguments:" - echo " -p, --p_value" - echo " type: double" - echo " example: 0.01" - echo " Specify a p-value for the statistical test. If the computed p-value of a" - echo " possible assembly exceeds the specified p-value then paired-end read" - echo " will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and" - echo " 1.0. Setting 1.0 disables the test." - echo "" - echo " -v, --min_overlap" - echo " type: integer" - echo " example: 10" - echo " Specify the minimum overlap size. The minimum overlap may be set to 1" - echo " when the statistical test is used. However, further restricting the" - echo " minimum overlap size to a proper value may reduce false-positive" - echo " assembles." - echo "" - echo " -m, --max_assembly_length" - echo " type: integer" - echo " example: 0" - echo " Specify the maximum possible length of the assembled sequences. Setting" - echo " this value to 0 disables the restriction and assembled sequences may be" - echo " arbitrary long." - echo "" - echo " -n, --min_assembly_length" - echo " type: integer" - echo " example: 0" - echo " Specify the minimum possible length of the assembled sequences. Setting" - echo " this value to 0 disables the restriction and assembled sequences may be" - echo " arbitrary short." - echo "" - echo " -t, --min_trim_length" - echo " type: integer" - echo " example: 1" - echo " Specify the minimum length of reads after trimming the low quality" - echo " part (see option -q)" - echo "" - echo " -q, --quality_threshold" - echo " type: integer" - echo " example: 0" - echo " Specify the quality threshold for trimming the low quality part of a" - echo " read. If the quality scores of two consecutive bases are strictly less" - echo " than the specified threshold, the rest of the read will be trimmed." - echo "" - echo " -u, --max_uncalled_base" - echo " type: double" - echo " example: 1.0" - echo " Specify the maximal proportion of uncalled bases in a read. Setting this" - echo " value to 0 will cause PEAR to discard all reads containing uncalled" - echo " bases. The other extreme setting is 1 which causes PEAR to process all" - echo " reads independent on the number of uncalled bases." - echo "" - echo " -g, --test_method" - echo " type: integer" - echo " example: 1" - echo " Specify the type of statistical test. Two options are available. 1:" - echo " Given the minimum allowed overlap, test using the highest OES. Note that" - echo " due to its discrete nature, this test usually yields a lower p-value for" - echo " the assembled read than the cut- off (specified by -p). For example," - echo " setting the cut-off to 0.05 using this test, the assembled reads might" - echo " have an actual p-value of 0.02." - echo " 2. Use the acceptance probability (m.a.p). This test methods computes" - echo " the same probability as test method 1. However, it assumes that the" - echo " minimal overlap is the observed overlap with the highest OES, instead of" - echo " the one specified by -v. Therefore, this is not a valid statistical test" - echo " and the 'p-value' is in fact the maximal probability for accepting the" - echo " assembly. Nevertheless, we observed in practice that for the case the" - echo " actual overlap sizes are relatively small, test 2 can correctly assemble" - echo " more reads with only slightly higher false-positive rate." - echo "" - echo " -e, --emperical_freqs" - echo " type: boolean_true" - echo " Disable empirical base frequencies." - echo "" - echo " -s, --score_method" - echo " type: integer" - echo " example: 2" - echo " Specify the scoring method. 1. OES with +1 for match and -1 for" - echo " mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch" - echo " multiplied by base quality scores. 3: Ignore quality scores and use +1" - echo " for a match and -1 for a mismatch." - echo "" - echo " -b, --phred_base" - echo " type: integer" - echo " example: 33" - echo " Base PHRED quality score." - echo "" - echo " -c, --cap" - echo " type: integer" - echo " example: 40" - echo " Specify the upper bound for the resulting quality score. If set to" - echo " zero, capping is disabled." - echo "" - echo " -z, --nbase" - echo " type: boolean_true" - echo " When merging a base-pair that consists of two non-equal bases out of" - echo " which none is degenerate, set the merged base to N and use the highest" - echo " quality score of the two bases" -} # initialise variables VIASH_MODE='run' @@ -597,9 +453,9 @@ echo "pear: $version" > /var/software_versions.txt LABEL org.opencontainers.image.authors="Kai Waldrant" LABEL org.opencontainers.image.description="Companion container for running component pear" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" LABEL org.opencontainers.image.source="https://github.com/tseemann/PEAR" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -714,6 +570,176 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "pear main" + echo "" + echo "PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger." + echo "It is fully parallelized and can run with as low as just a few kilobytes of" + echo "memory." + echo "" + echo "PEAR evaluates all possible paired-end read overlaps and without requiring the" + echo "target fragment size as input. In addition, it implements a statistical test for" + echo "minimizing false-positive results. Together with a highly optimized" + echo "implementation, it can merge millions of paired end reads within a couple of" + echo "minutes on a standard desktop computer." + echo "" + echo "Inputs:" + echo " -f, --forward_fastq" + echo " type: file, required parameter, file must exist" + echo " example: forward.fastq" + echo " Forward paired-end FASTQ file" + echo "" + echo " -r, --reverse_fastq" + echo " type: file, required parameter, file must exist" + echo " example: reverse.fastq" + echo " Reverse paired-end FASTQ file" + echo "" + echo "Outputs:" + echo " --assembled" + echo " type: file, required parameter, output, file must exist" + echo " The output file containing assembled reads. Can be compressed with gzip." + echo "" + echo " --unassembled_forward" + echo " type: file, required parameter, output, file must exist" + echo " The output file containing forward reads that could not be assembled." + echo " Can be compressed with gzip." + echo "" + echo " --unassembled_reverse" + echo " type: file, required parameter, output, file must exist" + echo " The output file containing reverse reads that could not be assembled." + echo " Can be compressed with gzip." + echo "" + echo " --discarded" + echo " type: file, required parameter, output, file must exist" + echo " The output file containing reads that were discarded due to too low" + echo " quality or too many uncalled bases. Can be compressed with gzip." + echo "" + echo "Arguments:" + echo " -p, --p_value" + echo " type: double" + echo " example: 0.01" + echo " Specify a p-value for the statistical test. If the computed p-value of a" + echo " possible assembly exceeds the specified p-value then paired-end read" + echo " will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and" + echo " 1.0. Setting 1.0 disables the test." + echo "" + echo " -v, --min_overlap" + echo " type: integer" + echo " example: 10" + echo " Specify the minimum overlap size. The minimum overlap may be set to 1" + echo " when the statistical test is used. However, further restricting the" + echo " minimum overlap size to a proper value may reduce false-positive" + echo " assembles." + echo "" + echo " -m, --max_assembly_length" + echo " type: integer" + echo " example: 0" + echo " Specify the maximum possible length of the assembled sequences. Setting" + echo " this value to 0 disables the restriction and assembled sequences may be" + echo " arbitrary long." + echo "" + echo " -n, --min_assembly_length" + echo " type: integer" + echo " example: 0" + echo " Specify the minimum possible length of the assembled sequences. Setting" + echo " this value to 0 disables the restriction and assembled sequences may be" + echo " arbitrary short." + echo "" + echo " -t, --min_trim_length" + echo " type: integer" + echo " example: 1" + echo " Specify the minimum length of reads after trimming the low quality" + echo " part (see option -q)" + echo "" + echo " -q, --quality_threshold" + echo " type: integer" + echo " example: 0" + echo " Specify the quality threshold for trimming the low quality part of a" + echo " read. If the quality scores of two consecutive bases are strictly less" + echo " than the specified threshold, the rest of the read will be trimmed." + echo "" + echo " -u, --max_uncalled_base" + echo " type: double" + echo " example: 1.0" + echo " Specify the maximal proportion of uncalled bases in a read. Setting this" + echo " value to 0 will cause PEAR to discard all reads containing uncalled" + echo " bases. The other extreme setting is 1 which causes PEAR to process all" + echo " reads independent on the number of uncalled bases." + echo "" + echo " -g, --test_method" + echo " type: integer" + echo " example: 1" + echo " Specify the type of statistical test. Two options are available. 1:" + echo " Given the minimum allowed overlap, test using the highest OES. Note that" + echo " due to its discrete nature, this test usually yields a lower p-value for" + echo " the assembled read than the cut- off (specified by -p). For example," + echo " setting the cut-off to 0.05 using this test, the assembled reads might" + echo " have an actual p-value of 0.02." + echo " 2. Use the acceptance probability (m.a.p). This test methods computes" + echo " the same probability as test method 1. However, it assumes that the" + echo " minimal overlap is the observed overlap with the highest OES, instead of" + echo " the one specified by -v. Therefore, this is not a valid statistical test" + echo " and the 'p-value' is in fact the maximal probability for accepting the" + echo " assembly. Nevertheless, we observed in practice that for the case the" + echo " actual overlap sizes are relatively small, test 2 can correctly assemble" + echo " more reads with only slightly higher false-positive rate." + echo "" + echo " -e, --emperical_freqs" + echo " type: boolean_true" + echo " Disable empirical base frequencies." + echo "" + echo " -s, --score_method" + echo " type: integer" + echo " example: 2" + echo " Specify the scoring method. 1. OES with +1 for match and -1 for" + echo " mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch" + echo " multiplied by base quality scores. 3: Ignore quality scores and use +1" + echo " for a match and -1 for a mismatch." + echo "" + echo " -b, --phred_base" + echo " type: integer" + echo " example: 33" + echo " Base PHRED quality score." + echo "" + echo " -c, --cap" + echo " type: integer" + echo " example: 40" + echo " Specify the upper bound for the resulting quality score. If set to" + echo " zero, capping is disabled." + echo "" + echo " -z, --nbase" + echo " type: boolean_true" + echo " When merging a base-pair that consists of two non-equal bases out of" + echo " which none is degenerate, set the merged base to N and use the highest" + echo " quality score of the two bases" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/qualimap/qualimap_rnaseq/.config.vsh.yaml b/target/executable/qualimap/qualimap_rnaseq/.config.vsh.yaml index 5e9ed110..d97d6cae 100644 --- a/target/executable/qualimap/qualimap_rnaseq/.config.vsh.yaml +++ b/target/executable/qualimap/qualimap_rnaseq/.config.vsh.yaml @@ -161,6 +161,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -263,16 +266,16 @@ build_info: engine: "docker|native" output: "target/executable/qualimap/qualimap_rnaseq" executable: "target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq b/target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq index fbe92281..ac1eca89 100755 --- a/target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq +++ b/target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq @@ -2,7 +2,7 @@ # qualimap_rnaseq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,81 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "qualimap_rnaseq main" - echo "" - echo "Qualimap RNA-seq QC reports quality control metrics and bias estimations" - echo "which are specific for whole transcriptome sequencing, including reads genomic" - echo "origin, junction analysis, transcript coverage and 5’-3’ bias computation." - echo "" - echo "Input:" - echo " --bam" - echo " type: file, required parameter, file must exist" - echo " example: alignment.bam" - echo " Path to the sequence alignment file in BAM format, produced by a" - echo " splicing-aware aligner." - echo "" - echo " --gtf" - echo " type: file, required parameter, file must exist" - echo " example: annotations.gtf" - echo " Path to genomic annotations in Ensembl GTF format." - echo "" - echo "Output:" - echo " --qc_results" - echo " type: file, required parameter, output, file must exist" - echo " example: rnaseq_qc_results.txt" - echo " Text file containing the RNAseq QC results." - echo "" - echo " --counts" - echo " type: file, output, file must exist" - echo " Output file for computed counts." - echo "" - echo " --report" - echo " type: file, output, file must exist" - echo " example: report.html" - echo " Report output file. Supported formats are PDF or HTML." - echo "" - echo "Optional:" - echo " --num_pr_bases" - echo " type: integer" - echo " min: 1" - echo " Number of upstream/downstream nucleotide bases to compute 5'-3' bias" - echo " (default = 100)." - echo "" - echo " --num_tr_bias" - echo " type: integer" - echo " min: 1" - echo " Number of top highly expressed transcripts to compute 5'-3' bias" - echo " (default = 1000)." - echo "" - echo " --algorithm" - echo " type: string" - echo " choices: [ uniquely-mapped-reads, proportional ]" - echo " Counting algorithm (uniquely-mapped-reads (default) or proportional)." - echo "" - echo " --sequencing_protocol" - echo " type: string" - echo " choices: [ non-strand-specific, strand-specific-reverse," - echo "strand-specific-forward ]" - echo " Sequencing library protocol (strand-specific-forward," - echo " strand-specific-reverse or non-strand-specific (default))." - echo "" - echo " --paired" - echo " type: boolean_true" - echo " Setting this flag for paired-end experiments will result in counting" - echo " fragments instead of reads." - echo "" - echo " --sorted" - echo " type: boolean_true" - echo " Setting this flag indicates that the input file is already sorted by" - echo " name. If flag is not set, additional sorting by name will be performed." - echo " Only requiredfor paired-end analysis." - echo "" - echo " --java_memory_size" - echo " type: string" - echo " maximum Java heap memory size, default = 4G." -} # initialise variables VIASH_MODE='run' @@ -527,9 +452,9 @@ RUN echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /v LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component qualimap qualimap_rnaseq" -LABEL org.opencontainers.image.created="2024-12-03T10:33:53Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://bitbucket.org/kokonech/qualimap/commits/branch/master" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -644,6 +569,107 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "qualimap_rnaseq main" + echo "" + echo "Qualimap RNA-seq QC reports quality control metrics and bias estimations" + echo "which are specific for whole transcriptome sequencing, including reads genomic" + echo "origin, junction analysis, transcript coverage and 5’-3’ bias computation." + echo "" + echo "Input:" + echo " --bam" + echo " type: file, required parameter, file must exist" + echo " example: alignment.bam" + echo " Path to the sequence alignment file in BAM format, produced by a" + echo " splicing-aware aligner." + echo "" + echo " --gtf" + echo " type: file, required parameter, file must exist" + echo " example: annotations.gtf" + echo " Path to genomic annotations in Ensembl GTF format." + echo "" + echo "Output:" + echo " --qc_results" + echo " type: file, required parameter, output, file must exist" + echo " example: rnaseq_qc_results.txt" + echo " Text file containing the RNAseq QC results." + echo "" + echo " --counts" + echo " type: file, output, file must exist" + echo " Output file for computed counts." + echo "" + echo " --report" + echo " type: file, output, file must exist" + echo " example: report.html" + echo " Report output file. Supported formats are PDF or HTML." + echo "" + echo "Optional:" + echo " --num_pr_bases" + echo " type: integer" + echo " min: 1" + echo " Number of upstream/downstream nucleotide bases to compute 5'-3' bias" + echo " (default = 100)." + echo "" + echo " --num_tr_bias" + echo " type: integer" + echo " min: 1" + echo " Number of top highly expressed transcripts to compute 5'-3' bias" + echo " (default = 1000)." + echo "" + echo " --algorithm" + echo " type: string" + echo " choices: [ uniquely-mapped-reads, proportional ]" + echo " Counting algorithm (uniquely-mapped-reads (default) or proportional)." + echo "" + echo " --sequencing_protocol" + echo " type: string" + echo " choices: [ non-strand-specific, strand-specific-reverse," + echo "strand-specific-forward ]" + echo " Sequencing library protocol (strand-specific-forward," + echo " strand-specific-reverse or non-strand-specific (default))." + echo "" + echo " --paired" + echo " type: boolean_true" + echo " Setting this flag for paired-end experiments will result in counting" + echo " fragments instead of reads." + echo "" + echo " --sorted" + echo " type: boolean_true" + echo " Setting this flag indicates that the input file is already sorted by" + echo " name. If flag is not set, additional sorting by name will be performed." + echo " Only requiredfor paired-end analysis." + echo "" + echo " --java_memory_size" + echo " type: string" + echo " maximum Java heap memory size, default = 4G." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml index 6151faca..c1edd507 100644 --- a/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml +++ b/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml @@ -724,6 +724,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -851,16 +854,16 @@ build_info: engine: "docker|native" output: "target/executable/rsem/rsem_calculate_expression" executable: "target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression b/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression index e60508ae..4ad46d89 100755 --- a/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression +++ b/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression @@ -2,7 +2,7 @@ # rsem_calculate_expression main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,425 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential gcc g++ make wget zlib1g-dev unzip && \ + rm -rf /var/lib/apt/lists/* + +ENV STAR_VERSION=2.7.11b +ENV RSEM_VERSION=1.3.3 +RUN apt-get update && \ +apt-get clean && \ +wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \ +unzip 2.7.11a.zip && \ +cp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \ +cd && \ +wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \ +unzip v1.3.3.zip && \ +cd RSEM-1.3.3 && \ +make && \ +make install + +RUN echo "RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`" > /var/software_versions.txt && \ +echo "STAR: `STAR --version`" >> /var/software_versions.txt && \ +echo "bowtie2: `bowtie2 --version | grep -oP '\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ +echo "bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ +echo "HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt + +LABEL org.opencontainers.image.description="Companion container for running component rsem rsem_calculate_expression" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" +LABEL org.opencontainers.image.source="https://github.com/deweylab/RSEM" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "rsem_calculate_expression main" @@ -689,425 +1108,32 @@ function ViashHelp { echo " Parameters for all the above models are learned from a training set. For" echo " detailed explanations, please" echo " see prior-enhanced RSEM's paper. (Default: 'pk')" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential gcc g++ make wget zlib1g-dev unzip && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION=2.7.11b -ENV RSEM_VERSION=1.3.3 -RUN apt-get update && \ -apt-get clean && \ -wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \ -unzip 2.7.11a.zip && \ -cp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \ -cd && \ -wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \ -unzip v1.3.3.zip && \ -cd RSEM-1.3.3 && \ -make && \ -make install - -RUN echo "RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`" > /var/software_versions.txt && \ -echo "STAR: `STAR --version`" >> /var/software_versions.txt && \ -echo "bowtie2: `bowtie2 --version | grep -oP '\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ -echo "bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ -echo "HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt - -LABEL org.opencontainers.image.description="Companion container for running component rsem rsem_calculate_expression" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" -LABEL org.opencontainers.image.source="https://github.com/deweylab/RSEM" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rsem/rsem_prepare_reference/.config.vsh.yaml b/target/executable/rsem/rsem_prepare_reference/.config.vsh.yaml index 6a4e3891..a96f3460 100644 --- a/target/executable/rsem/rsem_prepare_reference/.config.vsh.yaml +++ b/target/executable/rsem/rsem_prepare_reference/.config.vsh.yaml @@ -268,6 +268,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -415,16 +418,16 @@ build_info: engine: "docker|native" output: "target/executable/rsem/rsem_prepare_reference" executable: "target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference b/target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference index d54f8faa..44a05792 100755 --- a/target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference +++ b/target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference @@ -2,7 +2,7 @@ # rsem_prepare_reference main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,168 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rsem_prepare_reference main" - echo "" - echo "RSEM is a software package for estimating gene and isoform expression levels" - echo "from RNA-Seq data. This component prepares transcript references for RSEM." - echo "" - echo "Inputs:" - echo " --reference_fasta_files" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: read1.fasta" - echo " Semi-colon separated list of Multi-FASTA formatted files OR a directory" - echo " name. If a directory name is specified, RSEM will read all files with" - echo " suffix \".fa\" or \".fasta\" in this directory. The files should contain" - echo " either the sequences of transcripts or an entire genome, depending on" - echo " whether the '--gtf' option is used." - echo "" - echo " --reference_name" - echo " type: string, required parameter" - echo " example: /ref/mm9" - echo " The name of the reference used. RSEM will generate several" - echo " reference-related files that are prefixed by this name. This name can" - echo " contain path information (e.g. '/ref/mm9')." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Directory containing reference files generated by RSEM." - echo "" - echo "Other options:" - echo " --gtf" - echo " type: file, file must exist" - echo " example: annotations.gtf" - echo " Assume that 'reference_fasta_files' contains the sequence of a genome," - echo " and extract transcript reference sequences using the gene annotations" - echo " specified in the GTF file. If this and '--gff3' options are not" - echo " provided, RSEM will assume 'reference_fasta_files' contains the" - echo " reference transcripts. In this case, RSEM assumes that name of each" - echo " sequence in the Multi-FASTA files is its transcript_id." - echo "" - echo " --gff3" - echo " type: file, file must exist" - echo " example: annotations.gff" - echo " GFF3 annotation file. Converted to GTF format with the file name" - echo " 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does" - echo " not exist." - echo "" - echo " --gff3_rna_patterns" - echo " type: string, multiple values allowed" - echo " example: mRNA;rRNA" - echo " List of transcript categories (separated by semi-colon). Only" - echo " transcripts that match the string will be extracted." - echo "" - echo " --gff3_genes_as_transcripts" - echo " type: boolean_true" - echo " This option is designed for untypical organisms, such as viruses, whose" - echo " GFF3 files only contain genes. RSEM will assume each gene as a unique" - echo " transcript when it converts the GFF3 file into GTF format." - echo "" - echo " --trusted_sources" - echo " type: string, multiple values allowed" - echo " example: ENSEMBL;HAVANA" - echo " List of trusted sources (separated by semi-colon). Only transcripts" - echo " coming from these sources will be extracted. If this option is off, all" - echo " sources are accepted." - echo "" - echo " --transcript_to_gene_map" - echo " type: file, file must exist" - echo " example: isoforms.txt" - echo " Use information from this file to map from transcript (isoform) ids to" - echo " gene ids. Each line of this file should be of the form:" - echo " gene_id transcript_id" - echo " with the two fields separated by a tab character." - echo " If you are using a GTF file for the \"UCSC Genes\" gene set from the UCSC" - echo " Genome Browser, then the \"knownIsoforms.txt\" file (obtained from the" - echo " \"Downloads\" section of the UCSC Genome Browser site) is of this format." - echo " If this option is off, then the mapping of isoforms to genes depends on" - echo " whether the '--gtf' option is specified. If '--gtf' is specified, then" - echo " RSEM uses the \"gene_id\" and \"transcript_id\" attributes in the GTF file." - echo " Otherwise, RSEM assumes that each sequence in the reference sequence" - echo " files is a separate gene." - echo "" - echo " --allele_to_gene_map" - echo " type: file, file must exist" - echo " Use information from to provide gene_id and transcript_id" - echo " information for each allele-specific transcript. Each line of " - echo " should be of the form:" - echo " gene_id transcript_id allele_id" - echo " with the fields separated by a tab character." - echo " This option is designed for quantifying allele-specific expression. It" - echo " is only valid if '--gtf' option is not specified. allele_id should be" - echo " the sequence names presented in the Multi-FASTA-formatted files." - echo "" - echo " --polyA" - echo " type: boolean_true" - echo " Add poly(A) tails to the end of all reference isoforms. The length of" - echo " poly(A) tail added is specified by '--polyA-length' option. STAR aligner" - echo " users may not want to use this option." - echo "" - echo " --polyA_length" - echo " type: integer" - echo " example: 125" - echo " The length of the poly(A) tails to be added." - echo "" - echo " --no_polyA_subset" - echo " type: file, file must exist" - echo " example: transcript_ids.txt" - echo " Only meaningful if '--polyA' is specified. Do not add poly(A) tails to" - echo " those transcripts listed in this file containing a list of" - echo " transcript_ids." - echo "" - echo " --bowtie" - echo " type: boolean_true" - echo " Build Bowtie indices." - echo "" - echo " --bowtie2" - echo " type: boolean_true" - echo " Build Bowtie 2 indices." - echo "" - echo " --star" - echo " type: boolean_true" - echo " Build STAR indices." - echo "" - echo " --star_sjdboverhang" - echo " type: integer" - echo " example: 100" - echo " Length of the genomic sequence around annotated junction. It is only" - echo " used for STAR to build splice junctions database and not needed for" - echo " Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to" - echo " STAR. According to STAR's manual, its ideal value is max(ReadLength)-1," - echo " e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most" - echo " cases, the default value of 100 will work as well as the ideal value." - echo " (Default is 100)" - echo "" - echo " --hisat2_hca" - echo " type: boolean_true" - echo " Build HISAT2 indices on the transcriptome according to Human Cell Atlas" - echo " (HCA) SMART-Seq2 pipeline." - echo "" - echo " -q, --quiet" - echo " type: boolean_true" - echo " Suppress the output of logging information." - echo "" - echo "Prior-enhanced RSEM options:" - echo " --prep_pRSEM" - echo " type: boolean_true" - echo " A Boolean indicating whether to prepare reference files for pRSEM," - echo " including building Bowtie indices for a genome and selecting training" - echo " set isoforms. The index files will be used for aligning ChIP-seq reads" - echo " in prior-enhanced RSEM and the training set isoforms will be used for" - echo " learning prior. A path to Bowtie executables and a mappability file in" - echo " bigWig format are required when this option is on. Currently, Bowtie2 is" - echo " not supported for prior-enhanced RSEM." - echo "" - echo " --mappability_bigwig_file" - echo " type: file, file must exist" - echo " Full path to a whole-genome mappability file in bigWig format. This file" - echo " is required for running prior-enhanced RSEM. It is used for selecting a" - echo " training set of isoforms for prior-learning. This file can be either" - echo " downloaded from UCSC Genome Browser or generated by GEM (Derrien et al.," - echo " 2012, PLoS One)." -} # initialise variables VIASH_MODE='run' @@ -656,9 +494,9 @@ echo "HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \K\d+\.\d+\.\ LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" LABEL org.opencontainers.image.description="Companion container for running component rsem rsem_prepare_reference" -LABEL org.opencontainers.image.created="2024-12-03T10:33:52Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" LABEL org.opencontainers.image.source="https://github.com/deweylab/RSEM" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -773,6 +611,194 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rsem_prepare_reference main" + echo "" + echo "RSEM is a software package for estimating gene and isoform expression levels" + echo "from RNA-Seq data. This component prepares transcript references for RSEM." + echo "" + echo "Inputs:" + echo " --reference_fasta_files" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: read1.fasta" + echo " Semi-colon separated list of Multi-FASTA formatted files OR a directory" + echo " name. If a directory name is specified, RSEM will read all files with" + echo " suffix \".fa\" or \".fasta\" in this directory. The files should contain" + echo " either the sequences of transcripts or an entire genome, depending on" + echo " whether the '--gtf' option is used." + echo "" + echo " --reference_name" + echo " type: string, required parameter" + echo " example: /ref/mm9" + echo " The name of the reference used. RSEM will generate several" + echo " reference-related files that are prefixed by this name. This name can" + echo " contain path information (e.g. '/ref/mm9')." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Directory containing reference files generated by RSEM." + echo "" + echo "Other options:" + echo " --gtf" + echo " type: file, file must exist" + echo " example: annotations.gtf" + echo " Assume that 'reference_fasta_files' contains the sequence of a genome," + echo " and extract transcript reference sequences using the gene annotations" + echo " specified in the GTF file. If this and '--gff3' options are not" + echo " provided, RSEM will assume 'reference_fasta_files' contains the" + echo " reference transcripts. In this case, RSEM assumes that name of each" + echo " sequence in the Multi-FASTA files is its transcript_id." + echo "" + echo " --gff3" + echo " type: file, file must exist" + echo " example: annotations.gff" + echo " GFF3 annotation file. Converted to GTF format with the file name" + echo " 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does" + echo " not exist." + echo "" + echo " --gff3_rna_patterns" + echo " type: string, multiple values allowed" + echo " example: mRNA;rRNA" + echo " List of transcript categories (separated by semi-colon). Only" + echo " transcripts that match the string will be extracted." + echo "" + echo " --gff3_genes_as_transcripts" + echo " type: boolean_true" + echo " This option is designed for untypical organisms, such as viruses, whose" + echo " GFF3 files only contain genes. RSEM will assume each gene as a unique" + echo " transcript when it converts the GFF3 file into GTF format." + echo "" + echo " --trusted_sources" + echo " type: string, multiple values allowed" + echo " example: ENSEMBL;HAVANA" + echo " List of trusted sources (separated by semi-colon). Only transcripts" + echo " coming from these sources will be extracted. If this option is off, all" + echo " sources are accepted." + echo "" + echo " --transcript_to_gene_map" + echo " type: file, file must exist" + echo " example: isoforms.txt" + echo " Use information from this file to map from transcript (isoform) ids to" + echo " gene ids. Each line of this file should be of the form:" + echo " gene_id transcript_id" + echo " with the two fields separated by a tab character." + echo " If you are using a GTF file for the \"UCSC Genes\" gene set from the UCSC" + echo " Genome Browser, then the \"knownIsoforms.txt\" file (obtained from the" + echo " \"Downloads\" section of the UCSC Genome Browser site) is of this format." + echo " If this option is off, then the mapping of isoforms to genes depends on" + echo " whether the '--gtf' option is specified. If '--gtf' is specified, then" + echo " RSEM uses the \"gene_id\" and \"transcript_id\" attributes in the GTF file." + echo " Otherwise, RSEM assumes that each sequence in the reference sequence" + echo " files is a separate gene." + echo "" + echo " --allele_to_gene_map" + echo " type: file, file must exist" + echo " Use information from to provide gene_id and transcript_id" + echo " information for each allele-specific transcript. Each line of " + echo " should be of the form:" + echo " gene_id transcript_id allele_id" + echo " with the fields separated by a tab character." + echo " This option is designed for quantifying allele-specific expression. It" + echo " is only valid if '--gtf' option is not specified. allele_id should be" + echo " the sequence names presented in the Multi-FASTA-formatted files." + echo "" + echo " --polyA" + echo " type: boolean_true" + echo " Add poly(A) tails to the end of all reference isoforms. The length of" + echo " poly(A) tail added is specified by '--polyA-length' option. STAR aligner" + echo " users may not want to use this option." + echo "" + echo " --polyA_length" + echo " type: integer" + echo " example: 125" + echo " The length of the poly(A) tails to be added." + echo "" + echo " --no_polyA_subset" + echo " type: file, file must exist" + echo " example: transcript_ids.txt" + echo " Only meaningful if '--polyA' is specified. Do not add poly(A) tails to" + echo " those transcripts listed in this file containing a list of" + echo " transcript_ids." + echo "" + echo " --bowtie" + echo " type: boolean_true" + echo " Build Bowtie indices." + echo "" + echo " --bowtie2" + echo " type: boolean_true" + echo " Build Bowtie 2 indices." + echo "" + echo " --star" + echo " type: boolean_true" + echo " Build STAR indices." + echo "" + echo " --star_sjdboverhang" + echo " type: integer" + echo " example: 100" + echo " Length of the genomic sequence around annotated junction. It is only" + echo " used for STAR to build splice junctions database and not needed for" + echo " Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to" + echo " STAR. According to STAR's manual, its ideal value is max(ReadLength)-1," + echo " e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most" + echo " cases, the default value of 100 will work as well as the ideal value." + echo " (Default is 100)" + echo "" + echo " --hisat2_hca" + echo " type: boolean_true" + echo " Build HISAT2 indices on the transcriptome according to Human Cell Atlas" + echo " (HCA) SMART-Seq2 pipeline." + echo "" + echo " -q, --quiet" + echo " type: boolean_true" + echo " Suppress the output of logging information." + echo "" + echo "Prior-enhanced RSEM options:" + echo " --prep_pRSEM" + echo " type: boolean_true" + echo " A Boolean indicating whether to prepare reference files for pRSEM," + echo " including building Bowtie indices for a genome and selecting training" + echo " set isoforms. The index files will be used for aligning ChIP-seq reads" + echo " in prior-enhanced RSEM and the training set isoforms will be used for" + echo " learning prior. A path to Bowtie executables and a mappability file in" + echo " bigWig format are required when this option is on. Currently, Bowtie2 is" + echo " not supported for prior-enhanced RSEM." + echo "" + echo " --mappability_bigwig_file" + echo " type: file, file must exist" + echo " Full path to a whole-genome mappability file in bigWig format. This file" + echo " is required for running prior-enhanced RSEM. It is used for selecting a" + echo " training set of isoforms for prior-learning. This file can be either" + echo " downloaded from UCSC Genome Browser or generated by GEM (Derrien et al.," + echo " 2012, PLoS One)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml index 280b60bf..6d73df43 100644 --- a/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml @@ -68,6 +68,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -174,16 +177,16 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_bamstat" executable: "target/executable/rseqc/rseqc_bamstat/rseqc_bamstat" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat b/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat index 12df111e..c44379e3 100755 --- a/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat +++ b/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat @@ -2,7 +2,7 @@ # rseqc_bamstat main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,28 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_bamstat main" - echo "" - echo "Generate statistics from a bam file." - echo "" - echo "Input:" - echo " -i, --input_file" - echo " type: file, required parameter, file must exist" - echo " Input alignment file in BAM or SAM format." - echo "" - echo " -q, --mapq" - echo " type: integer" - echo " example: 30" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads. Default: '30'." - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " Output file (txt) with mapping quality statistics." -} # initialise variables VIASH_MODE='run' @@ -477,9 +455,9 @@ RUN echo "RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)" > /var/s LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_bamstat" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/MonashBioinformaticsPlatform/RSeQC" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -594,6 +572,54 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_bamstat main" + echo "" + echo "Generate statistics from a bam file." + echo "" + echo "Input:" + echo " -i, --input_file" + echo " type: file, required parameter, file must exist" + echo " Input alignment file in BAM or SAM format." + echo "" + echo " -q, --mapq" + echo " type: integer" + echo " example: 30" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads. Default: '30'." + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " Output file (txt) with mapping quality statistics." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml index 16703dc0..b5d69d14 100644 --- a/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml @@ -96,6 +96,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -200,16 +203,16 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_inferexperiment" executable: "target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment b/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment index ca87c154..21255b62 100755 --- a/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment +++ b/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment @@ -2,7 +2,7 @@ # rseqc_inferexperiment main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,39 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_inferexperiment main" - echo "" - echo "Infer strandedness from sequencing reads" - echo "" - echo "Input:" - echo " -i, --input_file" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " -r, --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo "Output:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: \$id.strandedness.txt" - echo " Output file (txt) of strandness report." - echo "" - echo "Options:" - echo " -s, --sample_size" - echo " type: integer" - echo " example: 200000" - echo " Number of reads sampled from SAM/BAM file. Default: 200000" - echo "" - echo " -q, --mapq" - echo " type: integer" - echo " example: 30" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads. Default: 30" -} # initialise variables VIASH_MODE='run' @@ -488,9 +455,9 @@ RUN echo "RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d' LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_inferexperiment" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" LABEL org.opencontainers.image.source="https://github.com/MonashBioinformaticsPlatform/RSeQC" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -605,6 +572,65 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_inferexperiment main" + echo "" + echo "Infer strandedness from sequencing reads" + echo "" + echo "Input:" + echo " -i, --input_file" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " -r, --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: \$id.strandedness.txt" + echo " Output file (txt) of strandness report." + echo "" + echo "Options:" + echo " -s, --sample_size" + echo " type: integer" + echo " example: 200000" + echo " Number of reads sampled from SAM/BAM file. Default: 200000" + echo "" + echo " -q, --mapq" + echo " type: integer" + echo " example: 30" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads. Default: 30" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_inner_distance/.config.vsh.yaml b/target/executable/rseqc/rseqc_inner_distance/.config.vsh.yaml index e2c5ed2a..7e003aad 100644 --- a/target/executable/rseqc/rseqc_inner_distance/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_inner_distance/.config.vsh.yaml @@ -185,6 +185,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -293,16 +296,16 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_inner_distance" executable: "target/executable/rseqc/rseqc_inner_distance/rseqc_inner_distance" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_inner_distance/rseqc_inner_distance b/target/executable/rseqc/rseqc_inner_distance/rseqc_inner_distance index 386ad6f3..79cb0882 100755 --- a/target/executable/rseqc/rseqc_inner_distance/rseqc_inner_distance +++ b/target/executable/rseqc/rseqc_inner_distance/rseqc_inner_distance @@ -2,7 +2,7 @@ # rseqc_inner_distance main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,79 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_inner_distance main" - echo "" - echo "Calculate inner distance between read pairs." - echo "" - echo "Input:" - echo " -i, --input_file" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " -r, --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo " -k, --sample_size" - echo " type: integer" - echo " example: 1000000" - echo " Numer of reads sampled from SAM/BAM file, default = 1000000." - echo "" - echo " -q, --mapq" - echo " type: integer" - echo " example: 30" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo " -l, --lower_bound" - echo " type: integer" - echo " example: -250" - echo " Lower bound of inner distance (bp). This option is used for ploting" - echo " histograme, default=-250." - echo "" - echo " -u, --upper_bound" - echo " type: integer" - echo " example: 250" - echo " Upper bound of inner distance (bp). This option is used for ploting" - echo " histograme, default=250." - echo "" - echo " -s, --step" - echo " type: integer" - echo " example: 5" - echo " Step size (bp) of histograme. This option is used for plotting" - echo " histogram, default=5." - echo "" - echo "Output:" - echo " -o, --output_prefix" - echo " type: string, required parameter" - echo " Rrefix of output files." - echo "" - echo " --output_stats" - echo " type: file, output, file must exist" - echo " output file (txt) with summary statistics of inner distances of paired" - echo " reads" - echo "" - echo " --output_dist" - echo " type: file, output, file must exist" - echo " output file (txt) with inner distances of all paired reads" - echo "" - echo " --output_freq" - echo " type: file, output, file must exist" - echo " output file (txt) with frequencies of inner distances of all paired" - echo " reads" - echo "" - echo " --output_plot" - echo " type: file, output, file must exist" - echo " output file (pdf) with histogram plot of of inner distances of all" - echo " paired reads" - echo "" - echo " --output_plot_r" - echo " type: file, output, file must exist" - echo " output file (R) with script of histogram plot of of inner distances of" - echo " all paired reads" -} # initialise variables VIASH_MODE='run' @@ -532,9 +459,9 @@ RUN echo "RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' ' - LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_inner_distance" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" LABEL org.opencontainers.image.source="https://github.com/MonashBioinformaticsPlatform/RSeQC" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -649,6 +576,105 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_inner_distance main" + echo "" + echo "Calculate inner distance between read pairs." + echo "" + echo "Input:" + echo " -i, --input_file" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " -r, --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo " -k, --sample_size" + echo " type: integer" + echo " example: 1000000" + echo " Numer of reads sampled from SAM/BAM file, default = 1000000." + echo "" + echo " -q, --mapq" + echo " type: integer" + echo " example: 30" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo " -l, --lower_bound" + echo " type: integer" + echo " example: -250" + echo " Lower bound of inner distance (bp). This option is used for ploting" + echo " histograme, default=-250." + echo "" + echo " -u, --upper_bound" + echo " type: integer" + echo " example: 250" + echo " Upper bound of inner distance (bp). This option is used for ploting" + echo " histograme, default=250." + echo "" + echo " -s, --step" + echo " type: integer" + echo " example: 5" + echo " Step size (bp) of histograme. This option is used for plotting" + echo " histogram, default=5." + echo "" + echo "Output:" + echo " -o, --output_prefix" + echo " type: string, required parameter" + echo " Rrefix of output files." + echo "" + echo " --output_stats" + echo " type: file, output, file must exist" + echo " output file (txt) with summary statistics of inner distances of paired" + echo " reads" + echo "" + echo " --output_dist" + echo " type: file, output, file must exist" + echo " output file (txt) with inner distances of all paired reads" + echo "" + echo " --output_freq" + echo " type: file, output, file must exist" + echo " output file (txt) with frequencies of inner distances of all paired" + echo " reads" + echo "" + echo " --output_plot" + echo " type: file, output, file must exist" + echo " output file (pdf) with histogram plot of of inner distances of all" + echo " paired reads" + echo "" + echo " --output_plot_r" + echo " type: file, output, file must exist" + echo " output file (R) with script of histogram plot of of inner distances of" + echo " all paired reads" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/salmon/salmon_index/.config.vsh.yaml b/target/executable/salmon/salmon_index/.config.vsh.yaml index 9a9aa6db..6c0cf5ea 100644 --- a/target/executable/salmon/salmon_index/.config.vsh.yaml +++ b/target/executable/salmon/salmon_index/.config.vsh.yaml @@ -176,6 +176,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -276,16 +279,16 @@ build_info: engine: "docker|native" output: "target/executable/salmon/salmon_index" executable: "target/executable/salmon/salmon_index/salmon_index" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/salmon/salmon_index/salmon_index b/target/executable/salmon/salmon_index/salmon_index index 5bb7aeb9..9eb84369 100755 --- a/target/executable/salmon/salmon_index/salmon_index +++ b/target/executable/salmon/salmon_index/salmon_index @@ -2,7 +2,7 @@ # salmon_index main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,100 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "salmon_index main" - echo "" - echo "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It" - echo "can either make use of pre-computed alignments (in the form of a SAM/BAM file)" - echo "to the transcripts rather than the raw reads, or can be run in the mapping-based" - echo "mode. This component creates a salmon index for the transcriptome to use Salmon" - echo "in the mapping-based mode. It is generally recommend that you build a" - echo "decoy-aware transcriptome file. This is done using the entire genome of the" - echo "organism as the decoy sequence by concatenating the genome to the end of the" - echo "transcriptome to be indexed and populating the decoys.txt file with the" - echo "chromosome names." - echo "" - echo "Inputs:" - echo " --genome" - echo " type: file, file must exist" - echo " example: genome.fasta" - echo " Genome of the organism to prepare the set of decoy sequences. Required" - echo " to build decoy-aware transcriptome." - echo "" - echo " -t, --transcripts" - echo " type: file, required parameter, file must exist" - echo " example: transcriptome.fasta" - echo " Transcript fasta file." - echo "" - echo " -k, --kmer_len" - echo " type: integer" - echo " example: 31" - echo " The size of k-mers that should be used for the quasi index." - echo "" - echo " --gencode" - echo " type: boolean_true" - echo " This flag will expect the input transcript fasta to be in GENCODE" - echo " format, and will split the transcript name at the first '|' character." - echo " These reduced names will be used in the output and when looking for" - echo " these transcripts in a gene to transcript GTF." - echo "" - echo " --features" - echo " type: boolean_true" - echo " This flag will expect the input reference to be in the tsv file format," - echo " and will split the feature name at the first 'tab' character. These" - echo " reduced names will be used in the output and when looking for the" - echo " sequence of the features.GTF." - echo "" - echo " --keep_duplicates" - echo " type: boolean_true" - echo " This flag will disable the default indexing behavior of discarding" - echo " sequence-identical duplicate transcripts. If this flag is passed, then" - echo " duplicate transcripts that appear in the input will be retained and" - echo " quantified separately." - echo "" - echo " --keep_fixed_fasta" - echo " type: boolean_true" - echo " Retain the fixed fasta file (without short transcripts and duplicates," - echo " clipped, etc.) generated during indexing." - echo "" - echo " -f, --filter_size" - echo " type: integer" - echo " example: -1" - echo " The size of the Bloom filter that will be used by TwoPaCo during" - echo " indexing. The filter will be of size 2^{filter_size}. The default value" - echo " of -1 means that the filter size will be automatically set based on the" - echo " number of distinct k-mers in the input, as estimated by nthll." - echo "" - echo " --sparse" - echo " type: boolean_true" - echo " Build the index using a sparse sampling of k-mer positions This will" - echo " require less memory (especially during quantification), but will take" - echo " longer to construct and can slow down mapping / alignment." - echo "" - echo " -d, --decoys" - echo " type: file, file must exist" - echo " example: decoys.txt" - echo " Treat these sequences ids from the reference as the decoys that may have" - echo " sequence homologous to some known transcript. For example in case of the" - echo " genome, provide a list of chromosome names (one per line)." - echo "" - echo " --no_clip" - echo " type: boolean_true" - echo " Don't clip poly-A tails from the ends of target sequences." - echo "" - echo " -n, --type" - echo " type: string" - echo " example: puff" - echo " The type of index to build; the only option is \"puff\" in this version of" - echo " salmon." - echo "" - echo "Output:" - echo " -i, --index" - echo " type: file, required parameter, output, file must exist" - echo " example: Salmon_index" - echo " Salmon index" -} # initialise variables VIASH_MODE='run' @@ -546,9 +452,9 @@ RUN salmon index -v 2>&1 | sed 's/salmon \([0-9.]*\)/salmon: \1/' > /var/softwar LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" LABEL org.opencontainers.image.description="Companion container for running component salmon salmon_index" -LABEL org.opencontainers.image.created="2024-12-03T10:34:02Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" LABEL org.opencontainers.image.source="https://github.com/COMBINE-lab/salmon" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -663,6 +569,126 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "salmon_index main" + echo "" + echo "Salmon is a tool for wicked-fast transcript quantification from RNA-seq data. It" + echo "can either make use of pre-computed alignments (in the form of a SAM/BAM file)" + echo "to the transcripts rather than the raw reads, or can be run in the mapping-based" + echo "mode. This component creates a salmon index for the transcriptome to use Salmon" + echo "in the mapping-based mode. It is generally recommend that you build a" + echo "decoy-aware transcriptome file. This is done using the entire genome of the" + echo "organism as the decoy sequence by concatenating the genome to the end of the" + echo "transcriptome to be indexed and populating the decoys.txt file with the" + echo "chromosome names." + echo "" + echo "Inputs:" + echo " --genome" + echo " type: file, file must exist" + echo " example: genome.fasta" + echo " Genome of the organism to prepare the set of decoy sequences. Required" + echo " to build decoy-aware transcriptome." + echo "" + echo " -t, --transcripts" + echo " type: file, required parameter, file must exist" + echo " example: transcriptome.fasta" + echo " Transcript fasta file." + echo "" + echo " -k, --kmer_len" + echo " type: integer" + echo " example: 31" + echo " The size of k-mers that should be used for the quasi index." + echo "" + echo " --gencode" + echo " type: boolean_true" + echo " This flag will expect the input transcript fasta to be in GENCODE" + echo " format, and will split the transcript name at the first '|' character." + echo " These reduced names will be used in the output and when looking for" + echo " these transcripts in a gene to transcript GTF." + echo "" + echo " --features" + echo " type: boolean_true" + echo " This flag will expect the input reference to be in the tsv file format," + echo " and will split the feature name at the first 'tab' character. These" + echo " reduced names will be used in the output and when looking for the" + echo " sequence of the features.GTF." + echo "" + echo " --keep_duplicates" + echo " type: boolean_true" + echo " This flag will disable the default indexing behavior of discarding" + echo " sequence-identical duplicate transcripts. If this flag is passed, then" + echo " duplicate transcripts that appear in the input will be retained and" + echo " quantified separately." + echo "" + echo " --keep_fixed_fasta" + echo " type: boolean_true" + echo " Retain the fixed fasta file (without short transcripts and duplicates," + echo " clipped, etc.) generated during indexing." + echo "" + echo " -f, --filter_size" + echo " type: integer" + echo " example: -1" + echo " The size of the Bloom filter that will be used by TwoPaCo during" + echo " indexing. The filter will be of size 2^{filter_size}. The default value" + echo " of -1 means that the filter size will be automatically set based on the" + echo " number of distinct k-mers in the input, as estimated by nthll." + echo "" + echo " --sparse" + echo " type: boolean_true" + echo " Build the index using a sparse sampling of k-mer positions This will" + echo " require less memory (especially during quantification), but will take" + echo " longer to construct and can slow down mapping / alignment." + echo "" + echo " -d, --decoys" + echo " type: file, file must exist" + echo " example: decoys.txt" + echo " Treat these sequences ids from the reference as the decoys that may have" + echo " sequence homologous to some known transcript. For example in case of the" + echo " genome, provide a list of chromosome names (one per line)." + echo "" + echo " --no_clip" + echo " type: boolean_true" + echo " Don't clip poly-A tails from the ends of target sequences." + echo "" + echo " -n, --type" + echo " type: string" + echo " example: puff" + echo " The type of index to build; the only option is \"puff\" in this version of" + echo " salmon." + echo "" + echo "Output:" + echo " -i, --index" + echo " type: file, required parameter, output, file must exist" + echo " example: Salmon_index" + echo " Salmon index" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/salmon/salmon_quant/.config.vsh.yaml b/target/executable/salmon/salmon_quant/.config.vsh.yaml index aa5c90c5..859a4d82 100644 --- a/target/executable/salmon/salmon_quant/.config.vsh.yaml +++ b/target/executable/salmon/salmon_quant/.config.vsh.yaml @@ -1072,6 +1072,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1172,16 +1175,16 @@ build_info: engine: "docker|native" output: "target/executable/salmon/salmon_quant" executable: "target/executable/salmon/salmon_quant/salmon_quant" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/salmon/salmon_quant/salmon_quant b/target/executable/salmon/salmon_quant/salmon_quant index 8339019b..b999158d 100755 --- a/target/executable/salmon/salmon_quant/salmon_quant +++ b/target/executable/salmon/salmon_quant/salmon_quant @@ -2,7 +2,7 @@ # salmon_quant main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/salmon:1.10.2--hecfa306_0 +ENTRYPOINT [] +RUN salmon index -v 2>&1 | sed 's/salmon \([0-9.]*\)/salmon: \1/' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" +LABEL org.opencontainers.image.description="Companion container for running component salmon salmon_quant" +LABEL org.opencontainers.image.created="2025-03-06T09:19:41Z" +LABEL org.opencontainers.image.source="https://github.com/COMBINE-lab/salmon" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "salmon_quant main" @@ -887,404 +1285,32 @@ function ViashHelp { echo " but setting it large enough to accommodate all of the mapped read can" echo " substantially speed up inference on \"small\" files that contain only a" echo " few million reads." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/salmon:1.10.2--hecfa306_0 -ENTRYPOINT [] -RUN salmon index -v 2>&1 | sed 's/salmon \([0-9.]*\)/salmon: \1/' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" -LABEL org.opencontainers.image.description="Companion container for running component salmon salmon_quant" -LABEL org.opencontainers.image.created="2024-12-03T10:34:01Z" -LABEL org.opencontainers.image.source="https://github.com/COMBINE-lab/salmon" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_collate/.config.vsh.yaml b/target/executable/samtools/samtools_collate/.config.vsh.yaml index 8a00ae2f..86e95721 100644 --- a/target/executable/samtools/samtools_collate/.config.vsh.yaml +++ b/target/executable/samtools/samtools_collate/.config.vsh.yaml @@ -158,6 +158,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -263,16 +266,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_collate" executable: "target/executable/samtools/samtools_collate/samtools_collate" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_collate/samtools_collate b/target/executable/samtools/samtools_collate/samtools_collate index fd96d64e..4ab294f1 100755 --- a/target/executable/samtools/samtools_collate/samtools_collate +++ b/target/executable/samtools/samtools_collate/samtools_collate @@ -2,7 +2,7 @@ # samtools_collate main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,72 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_collate main" - echo "" - echo "Shuffles and groups reads in SAM/BAM/CRAM files together by their names." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The input BAM file." - echo "" - echo " --reference" - echo " type: file, file must exist" - echo " Reference sequence FASTA FILE." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " The output filename." - echo "" - echo "Options:" - echo " -u, --uncompressed" - echo " type: boolean_true" - echo " Output uncompressed BAM." - echo "" - echo " -f, --fast" - echo " type: boolean_true" - echo " Fast mode, only primary alignments." - echo "" - echo " -r, --working_reads" - echo " type: integer" - echo " default: 10000" - echo " Working reads stored (for use with -f)." - echo "" - echo " -l, --compression" - echo " type: integer" - echo " default: 1" - echo " Compression level." - echo "" - echo " -n, --nb_tmp_files" - echo " type: integer" - echo " default: 64" - echo " Number of temporary files." - echo "" - echo " -T, --tmp_prefix" - echo " type: string" - echo " Write temporary files to PREFIX.nnnn.bam." - echo "" - echo " --no_pg" - echo " type: boolean_true" - echo " Do not add a PG line." - echo "" - echo " --input_fmt_option" - echo " type: string" - echo " Specify a single input file format option in the form of OPTION or" - echo " OPTION=VALUE." - echo "" - echo " --output_fmt" - echo " type: string" - echo " Specify output format (SAM, BAM, CRAM)." - echo "" - echo " --output_fmt_option" - echo " type: string" - echo " Specify a single output file format option in the form of OPTION or" - echo " OPTION=VALUE." -} # initialise variables VIASH_MODE='run' @@ -519,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_collate" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -636,6 +570,98 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_collate main" + echo "" + echo "Shuffles and groups reads in SAM/BAM/CRAM files together by their names." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The input BAM file." + echo "" + echo " --reference" + echo " type: file, file must exist" + echo " Reference sequence FASTA FILE." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " The output filename." + echo "" + echo "Options:" + echo " -u, --uncompressed" + echo " type: boolean_true" + echo " Output uncompressed BAM." + echo "" + echo " -f, --fast" + echo " type: boolean_true" + echo " Fast mode, only primary alignments." + echo "" + echo " -r, --working_reads" + echo " type: integer" + echo " default: 10000" + echo " Working reads stored (for use with -f)." + echo "" + echo " -l, --compression" + echo " type: integer" + echo " default: 1" + echo " Compression level." + echo "" + echo " -n, --nb_tmp_files" + echo " type: integer" + echo " default: 64" + echo " Number of temporary files." + echo "" + echo " -T, --tmp_prefix" + echo " type: string" + echo " Write temporary files to PREFIX.nnnn.bam." + echo "" + echo " --no_pg" + echo " type: boolean_true" + echo " Do not add a PG line." + echo "" + echo " --input_fmt_option" + echo " type: string" + echo " Specify a single input file format option in the form of OPTION or" + echo " OPTION=VALUE." + echo "" + echo " --output_fmt" + echo " type: string" + echo " Specify output format (SAM, BAM, CRAM)." + echo "" + echo " --output_fmt_option" + echo " type: string" + echo " Specify a single output file format option in the form of OPTION or" + echo " OPTION=VALUE." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_faidx/.config.vsh.yaml b/target/executable/samtools/samtools_faidx/.config.vsh.yaml index f0645096..8b83baae 100644 --- a/target/executable/samtools/samtools_faidx/.config.vsh.yaml +++ b/target/executable/samtools/samtools_faidx/.config.vsh.yaml @@ -139,6 +139,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -242,16 +245,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_faidx" executable: "target/executable/samtools/samtools_faidx/samtools_faidx" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_faidx/samtools_faidx b/target/executable/samtools/samtools_faidx/samtools_faidx index e3782810..c5fdc0b1 100755 --- a/target/executable/samtools/samtools_faidx/samtools_faidx +++ b/target/executable/samtools/samtools_faidx/samtools_faidx @@ -2,7 +2,7 @@ # samtools_faidx main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,65 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_faidx main" - echo "" - echo "Indexes FASTA files to enable random access to fasta and fastq files." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, file must exist" - echo " FASTA input file." - echo "" - echo " -n, --length" - echo " type: integer" - echo " default: 60" - echo " Length for FASTA sequence line wrapping. If zero, this means do not" - echo " line wrap. Defaults to the line length in the input file." - echo "" - echo " -r, --region_file" - echo " type: file, file must exist" - echo " File of regions. Format is chr:from-to. One per line." - echo " Must be used with --output to avoid sending output to stdout." - echo "" - echo "Options:" - echo " --continue" - echo " type: boolean_true" - echo " Continue working if a non-existent region is requested." - echo "" - echo " -i, --reverse_complement" - echo " type: boolean_true" - echo " Reverse complement sequences." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.fasta" - echo " Write output to file." - echo "" - echo " --mark_strand" - echo " type: string" - echo " default: rc" - echo " Add strand indicator to sequence name. Options are:" - echo " [ rc, no, sign, custom,, ]" - echo "" - echo " --fai_idx" - echo " type: file, output, file must exist" - echo " example: file.fa.fai" - echo " Read/Write to specified index file (default file.fa.fai)." - echo "" - echo " --gzi_idx" - echo " type: file, output, file must exist" - echo " example: file.fa.gz.gzi" - echo " Read/Write to specified compressed file index (used with .gz files," - echo " default file.fa.gz.gzi)." - echo "" - echo " --fastq" - echo " type: boolean_true" - echo " Read FASTQ files and output extracted sequences in FASTQ format. Same as" - echo " using samtools fqidx." -} # initialise variables VIASH_MODE='run' @@ -512,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_faidx" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -629,6 +570,91 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_faidx main" + echo "" + echo "Indexes FASTA files to enable random access to fasta and fastq files." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, file must exist" + echo " FASTA input file." + echo "" + echo " -n, --length" + echo " type: integer" + echo " default: 60" + echo " Length for FASTA sequence line wrapping. If zero, this means do not" + echo " line wrap. Defaults to the line length in the input file." + echo "" + echo " -r, --region_file" + echo " type: file, file must exist" + echo " File of regions. Format is chr:from-to. One per line." + echo " Must be used with --output to avoid sending output to stdout." + echo "" + echo "Options:" + echo " --continue" + echo " type: boolean_true" + echo " Continue working if a non-existent region is requested." + echo "" + echo " -i, --reverse_complement" + echo " type: boolean_true" + echo " Reverse complement sequences." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.fasta" + echo " Write output to file." + echo "" + echo " --mark_strand" + echo " type: string" + echo " default: rc" + echo " Add strand indicator to sequence name. Options are:" + echo " [ rc, no, sign, custom,, ]" + echo "" + echo " --fai_idx" + echo " type: file, output, file must exist" + echo " example: file.fa.fai" + echo " Read/Write to specified index file (default file.fa.fai)." + echo "" + echo " --gzi_idx" + echo " type: file, output, file must exist" + echo " example: file.fa.gz.gzi" + echo " Read/Write to specified compressed file index (used with .gz files," + echo " default file.fa.gz.gzi)." + echo "" + echo " --fastq" + echo " type: boolean_true" + echo " Read FASTQ files and output extracted sequences in FASTQ format. Same as" + echo " using samtools fqidx." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_fasta/.config.vsh.yaml b/target/executable/samtools/samtools_fasta/.config.vsh.yaml index 4cdf517e..337aa2e8 100644 --- a/target/executable/samtools/samtools_fasta/.config.vsh.yaml +++ b/target/executable/samtools/samtools_fasta/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -432,16 +435,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_fasta" executable: "target/executable/samtools/samtools_fasta/samtools_fasta" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_fasta/samtools_fasta b/target/executable/samtools/samtools_fasta/samtools_fasta index 22c425f0..cc12c0fe 100755 --- a/target/executable/samtools/samtools_fasta/samtools_fasta +++ b/target/executable/samtools/samtools_fasta/samtools_fasta @@ -2,7 +2,7 @@ # samtools_fasta main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,178 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_fasta main" - echo "" - echo "Converts a SAM, BAM or CRAM to FASTA format." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input SAM/BAM/CRAM file" - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " output FASTA file" - echo "" - echo "Options:" - echo " -n, --no_suffix" - echo " type: boolean_true" - echo " By default, either '/1' or '/2' is added to the end of read names where" - echo " the corresponding" - echo " READ1 or READ2 FLAG bit is set. Using -n causes read names to be left as" - echo " they are." - echo "" - echo " -N, --suffix" - echo " type: boolean_true" - echo " Always add either '/1' or '/2' to the end of read names even when put" - echo " into different files." - echo "" - echo " -O, --use_oq" - echo " type: boolean_true" - echo " Use quality values from OQ tags in preference to standard quality string" - echo " if available." - echo "" - echo " -s, --singleton" - echo " type: file, file must exist" - echo " write singleton reads to FILE." - echo "" - echo " -t, --copy_tags" - echo " type: boolean_true" - echo " Copy RG, BC and QT tags to the FASTA header line, if they exist." - echo "" - echo " -T, --copy_tags_list" - echo " type: string" - echo " Specify a comma-separated list of tags to copy to the FASTA header line," - echo " if they exist." - echo " TAGLIST can be blank or \`*\` to indicate all tags should be copied to the" - echo " output. If using \`*\`," - echo " be careful to quote it to avoid unwanted shell expansion." - echo "" - echo " -1, --read1" - echo " type: file, output, file must exist" - echo " Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead" - echo " of outputting them." - echo " If the -s option is used, only paired reads will be written to this" - echo " file." - echo "" - echo " -2, --read2" - echo " type: file, output, file must exist" - echo " Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead" - echo " of outputting them." - echo " If the -s option is used, only paired reads will be written to this" - echo " file." - echo "" - echo " -o, --output_reads" - echo " type: file, output, file must exist" - echo " Write reads with either READ1 FLAG or READ2 flag set to FILE instead of" - echo " outputting them to stdout." - echo " This is equivalent to -1 FILE -2 FILE." - echo "" - echo " 0, --output_reads_both" - echo " type: file, output, file must exist" - echo " Write reads where the READ1 and READ2 FLAG bits set are either both set" - echo " or both unset to FILE" - echo " instead of outputting them." - echo "" - echo " -f, --filter_flags" - echo " type: integer" - echo " example: 0" - echo " Only output alignments with all bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/). Default: \`0\`." - echo "" - echo " -F, --excl_flags" - echo " type: string" - echo " example: 0x900" - echo " Do not output alignments with any bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of" - echo " secondary and" - echo " supplementary alignments. Default: \`0x900\`." - echo "" - echo " --rf, --incl_flags" - echo " type: string" - echo " example: 0" - echo " Only output alignments with any bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a" - echo " comma-separated list of" - echo " flag names. Default: \`0\`." - echo "" - echo " -G, --excl_flags_all" - echo " type: integer" - echo " example: 0" - echo " Only EXCLUDE reads with all of the bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0' (i.e. /^0[0-7]+/)." - echo " Default: \`0\`." - echo "" - echo " -d, --aux_tag" - echo " type: string" - echo " Only output alignments containing an auxiliary tag matching both TAG and" - echo " VAL. If VAL is omitted" - echo " then any value is accepted. The tag types supported are i, f, Z, A and" - echo " H. \"B\" arrays are not" - echo " supported. This is comparable to the method used in samtools view --tag." - echo " The option may be specified" - echo " multiple times and is equivalent to using the --aux_tag_file option." - echo "" - echo " -D, --aux_tag_file" - echo " type: string" - echo " Only output alignments containing an auxiliary tag matching TAG and" - echo " having a value listed in FILE." - echo " The format of the file is one line per value. This is equivalent to" - echo " specifying --aux_tag multiple times." - echo "" - echo " -i, --casava" - echo " type: boolean_true" - echo " add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)" - echo "" - echo " -c, --compression" - echo " type: integer" - echo " example: 0" - echo " set compression level when writing gz or bgzf fasta files." - echo "" - echo " --i1, --index1" - echo " type: file, file must exist" - echo " write first index reads to FILE." - echo "" - echo " --i2, --index2" - echo " type: file, file must exist" - echo " write second index reads to FILE." - echo "" - echo " --barcode_tag" - echo " type: string" - echo " example: BC" - echo " Auxiliary tag to find index reads in. Default: \`BC\`." - echo "" - echo " --quality_tag" - echo " type: string" - echo " example: QT" - echo " Auxiliary tag to find index quality in. Default: \`QT\`." - echo "" - echo " --index_format" - echo " type: string" - echo " string to describe how to parse the barcode and quality tags. For" - echo " example:" - echo " * \`i14i8\`: the first 14 characters are index 1, the next 8 characters" - echo " are index 2." - echo " * \`n8i14\`: ignore the first 8 characters, and use the next 14 characters" - echo " for index 1." - echo " If the tag contains a separator, then the numeric part can be replaced" - echo " with\`*\` to mean" - echo " 'read until the separator or end of tag', for example: \`n*i*\`." -} # initialise variables VIASH_MODE='run' @@ -625,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_fasta" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -742,6 +570,204 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_fasta main" + echo "" + echo "Converts a SAM, BAM or CRAM to FASTA format." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input SAM/BAM/CRAM file" + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " output FASTA file" + echo "" + echo "Options:" + echo " -n, --no_suffix" + echo " type: boolean_true" + echo " By default, either '/1' or '/2' is added to the end of read names where" + echo " the corresponding" + echo " READ1 or READ2 FLAG bit is set. Using -n causes read names to be left as" + echo " they are." + echo "" + echo " -N, --suffix" + echo " type: boolean_true" + echo " Always add either '/1' or '/2' to the end of read names even when put" + echo " into different files." + echo "" + echo " -O, --use_oq" + echo " type: boolean_true" + echo " Use quality values from OQ tags in preference to standard quality string" + echo " if available." + echo "" + echo " -s, --singleton" + echo " type: file, file must exist" + echo " write singleton reads to FILE." + echo "" + echo " -t, --copy_tags" + echo " type: boolean_true" + echo " Copy RG, BC and QT tags to the FASTA header line, if they exist." + echo "" + echo " -T, --copy_tags_list" + echo " type: string" + echo " Specify a comma-separated list of tags to copy to the FASTA header line," + echo " if they exist." + echo " TAGLIST can be blank or \`*\` to indicate all tags should be copied to the" + echo " output. If using \`*\`," + echo " be careful to quote it to avoid unwanted shell expansion." + echo "" + echo " -1, --read1" + echo " type: file, output, file must exist" + echo " Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead" + echo " of outputting them." + echo " If the -s option is used, only paired reads will be written to this" + echo " file." + echo "" + echo " -2, --read2" + echo " type: file, output, file must exist" + echo " Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead" + echo " of outputting them." + echo " If the -s option is used, only paired reads will be written to this" + echo " file." + echo "" + echo " -o, --output_reads" + echo " type: file, output, file must exist" + echo " Write reads with either READ1 FLAG or READ2 flag set to FILE instead of" + echo " outputting them to stdout." + echo " This is equivalent to -1 FILE -2 FILE." + echo "" + echo " 0, --output_reads_both" + echo " type: file, output, file must exist" + echo " Write reads where the READ1 and READ2 FLAG bits set are either both set" + echo " or both unset to FILE" + echo " instead of outputting them." + echo "" + echo " -f, --filter_flags" + echo " type: integer" + echo " example: 0" + echo " Only output alignments with all bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/). Default: \`0\`." + echo "" + echo " -F, --excl_flags" + echo " type: string" + echo " example: 0x900" + echo " Do not output alignments with any bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of" + echo " secondary and" + echo " supplementary alignments. Default: \`0x900\`." + echo "" + echo " --rf, --incl_flags" + echo " type: string" + echo " example: 0" + echo " Only output alignments with any bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a" + echo " comma-separated list of" + echo " flag names. Default: \`0\`." + echo "" + echo " -G, --excl_flags_all" + echo " type: integer" + echo " example: 0" + echo " Only EXCLUDE reads with all of the bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0' (i.e. /^0[0-7]+/)." + echo " Default: \`0\`." + echo "" + echo " -d, --aux_tag" + echo " type: string" + echo " Only output alignments containing an auxiliary tag matching both TAG and" + echo " VAL. If VAL is omitted" + echo " then any value is accepted. The tag types supported are i, f, Z, A and" + echo " H. \"B\" arrays are not" + echo " supported. This is comparable to the method used in samtools view --tag." + echo " The option may be specified" + echo " multiple times and is equivalent to using the --aux_tag_file option." + echo "" + echo " -D, --aux_tag_file" + echo " type: string" + echo " Only output alignments containing an auxiliary tag matching TAG and" + echo " having a value listed in FILE." + echo " The format of the file is one line per value. This is equivalent to" + echo " specifying --aux_tag multiple times." + echo "" + echo " -i, --casava" + echo " type: boolean_true" + echo " add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)" + echo "" + echo " -c, --compression" + echo " type: integer" + echo " example: 0" + echo " set compression level when writing gz or bgzf fasta files." + echo "" + echo " --i1, --index1" + echo " type: file, file must exist" + echo " write first index reads to FILE." + echo "" + echo " --i2, --index2" + echo " type: file, file must exist" + echo " write second index reads to FILE." + echo "" + echo " --barcode_tag" + echo " type: string" + echo " example: BC" + echo " Auxiliary tag to find index reads in. Default: \`BC\`." + echo "" + echo " --quality_tag" + echo " type: string" + echo " example: QT" + echo " Auxiliary tag to find index quality in. Default: \`QT\`." + echo "" + echo " --index_format" + echo " type: string" + echo " string to describe how to parse the barcode and quality tags. For" + echo " example:" + echo " * \`i14i8\`: the first 14 characters are index 1, the next 8 characters" + echo " are index 2." + echo " * \`n8i14\`: ignore the first 8 characters, and use the next 14 characters" + echo " for index 1." + echo " If the tag contains a separator, then the numeric part can be replaced" + echo " with\`*\` to mean" + echo " 'read until the separator or end of tag', for example: \`n*i*\`." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_fastq/.config.vsh.yaml b/target/executable/samtools/samtools_fastq/.config.vsh.yaml index 64d1ea21..a157fd0c 100644 --- a/target/executable/samtools/samtools_fastq/.config.vsh.yaml +++ b/target/executable/samtools/samtools_fastq/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -432,16 +435,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_fastq" executable: "target/executable/samtools/samtools_fastq/samtools_fastq" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_fastq/samtools_fastq b/target/executable/samtools/samtools_fastq/samtools_fastq index f34a74ec..35a89d58 100755 --- a/target/executable/samtools/samtools_fastq/samtools_fastq +++ b/target/executable/samtools/samtools_fastq/samtools_fastq @@ -2,7 +2,7 @@ # samtools_fastq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,179 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_fastq main" - echo "" - echo "Converts a SAM, BAM or CRAM to FASTQ format." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input SAM/BAM/CRAM file" - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " output FASTQ file" - echo "" - echo "Options:" - echo " -n, --no_suffix" - echo " type: boolean_true" - echo " By default, either '/1' or '/2' is added to the end of read names where" - echo " the corresponding" - echo " READ1 or READ2 FLAG bit is set. Using -n causes read names to be left as" - echo " they are." - echo "" - echo " -N, --suffix" - echo " type: boolean_true" - echo " Always add either '/1' or '/2' to the end of read names even when put" - echo " into different files." - echo "" - echo " -O, --use_oq" - echo " type: boolean_true" - echo " Use quality values from OQ tags in preference to standard quality string" - echo " if available." - echo "" - echo " -s, --singleton" - echo " type: file, file must exist" - echo " write singleton reads to FILE." - echo "" - echo " -t, --copy_tags" - echo " type: boolean_true" - echo " Copy RG, BC and QT tags to the FASTQ header line, if they exist." - echo "" - echo " -T, --copy_tags_list" - echo " type: string" - echo " Specify a comma-separated list of tags to copy to the FASTQ header line," - echo " if they exist." - echo " TAGLIST can be blank or \`*\` to indicate all tags should be copied to the" - echo " output. If using \`*\`," - echo " be careful to quote it to avoid unwanted shell expansion." - echo "" - echo " -1, --read1" - echo " type: file, output, file must exist" - echo " Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead" - echo " of outputting them." - echo " If the -s option is used, only paired reads will be written to this" - echo " file." - echo "" - echo " -2, --read2" - echo " type: file, output, file must exist" - echo " Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead" - echo " of outputting them." - echo " If the -s option is used, only paired reads will be written to this" - echo " file." - echo "" - echo " -o, --output_reads" - echo " type: file, output, file must exist" - echo " Write reads with either READ1 FLAG or READ2 flag set to FILE instead of" - echo " outputting them to stdout." - echo " This is equivalent to -1 FILE -2 FILE." - echo "" - echo " 0, --output_reads_both" - echo " type: file, output, file must exist" - echo " Write reads where the READ1 and READ2 FLAG bits set are either both set" - echo " or both unset to FILE" - echo " instead of outputting them." - echo "" - echo " -f, --filter_flags" - echo " type: integer" - echo " example: 0" - echo " Only output alignments with all bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/). Default: \`0\`." - echo "" - echo " -F, --excl_flags" - echo " type: string" - echo " example: 0x900" - echo " Do not output alignments with any bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of" - echo " secondary and" - echo " supplementary alignments. Default: \`0x900\`." - echo "" - echo " --rf, --incl_flags" - echo " type: string" - echo " example: 0" - echo " Only output alignments with any bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by" - echo " beginning with '0'" - echo " (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a" - echo " comma-separated list of" - echo " flag names. Default: \`0\`." - echo "" - echo " -G, --excl_flags_all" - echo " type: integer" - echo " example: 0" - echo " Only EXCLUDE reads with all of the bits set in INT present in the FLAG" - echo " field. INT can be specified" - echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" - echo " beginning with '0' (i.e. /^0[0-7]+/)." - echo " Default: \`0\`." - echo "" - echo " -d, --aux_tag" - echo " type: string" - echo " Only output alignments containing an auxiliary tag matching both TAG and" - echo " VAL. If VAL is omitted" - echo " then any value is accepted. The tag types supported are i, f, Z, A and" - echo " H. \"B\" arrays are not" - echo " supported. This is comparable to the method used in samtools view --tag." - echo " The option may be specified" - echo " multiple times and is equivalent to using the --aux_tag_file option." - echo "" - echo " -D, --aux_tag_file" - echo " type: string" - echo " Only output alignments containing an auxiliary tag matching TAG and" - echo " having a value listed in FILE." - echo " The format of the file is one line per value. This is equivalent to" - echo " specifying --aux_tag multiple times." - echo "" - echo " -i, --casava" - echo " type: boolean_true" - echo " Add Illumina Casava 1.8 format entry to header, for example:" - echo " \`1:N:0:ATCACG\`." - echo "" - echo " -c, --compression" - echo " type: integer" - echo " example: 0" - echo " set compression level when writing gz or bgzf fastq files." - echo "" - echo " --i1, --index1" - echo " type: file, file must exist" - echo " write first index reads to FILE." - echo "" - echo " --i2, --index2" - echo " type: file, file must exist" - echo " write second index reads to FILE." - echo "" - echo " --barcode_tag" - echo " type: string" - echo " example: BC" - echo " Auxiliary tag to find index reads in. Default: \`BC\`." - echo "" - echo " --quality_tag" - echo " type: string" - echo " example: QT" - echo " Auxiliary tag to find index quality in. Default: \`QT\`." - echo "" - echo " --index_format" - echo " type: string" - echo " string to describe how to parse the barcode and quality tags. For" - echo " example:" - echo " * \`i14i8\`: the first 14 characters are index 1, the next 8 characters" - echo " are index 2." - echo " * \`n8i14\`: ignore the first 8 characters, and use the next 14 characters" - echo " for index 1." - echo " If the tag contains a separator, then the numeric part can be replaced" - echo " with '*' to mean" - echo " 'read until the separator or end of tag', for example: \`n*i*\`." -} # initialise variables VIASH_MODE='run' @@ -626,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_fastq" -LABEL org.opencontainers.image.created="2024-12-03T10:33:57Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:37Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -743,6 +570,205 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_fastq main" + echo "" + echo "Converts a SAM, BAM or CRAM to FASTQ format." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input SAM/BAM/CRAM file" + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " output FASTQ file" + echo "" + echo "Options:" + echo " -n, --no_suffix" + echo " type: boolean_true" + echo " By default, either '/1' or '/2' is added to the end of read names where" + echo " the corresponding" + echo " READ1 or READ2 FLAG bit is set. Using -n causes read names to be left as" + echo " they are." + echo "" + echo " -N, --suffix" + echo " type: boolean_true" + echo " Always add either '/1' or '/2' to the end of read names even when put" + echo " into different files." + echo "" + echo " -O, --use_oq" + echo " type: boolean_true" + echo " Use quality values from OQ tags in preference to standard quality string" + echo " if available." + echo "" + echo " -s, --singleton" + echo " type: file, file must exist" + echo " write singleton reads to FILE." + echo "" + echo " -t, --copy_tags" + echo " type: boolean_true" + echo " Copy RG, BC and QT tags to the FASTQ header line, if they exist." + echo "" + echo " -T, --copy_tags_list" + echo " type: string" + echo " Specify a comma-separated list of tags to copy to the FASTQ header line," + echo " if they exist." + echo " TAGLIST can be blank or \`*\` to indicate all tags should be copied to the" + echo " output. If using \`*\`," + echo " be careful to quote it to avoid unwanted shell expansion." + echo "" + echo " -1, --read1" + echo " type: file, output, file must exist" + echo " Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead" + echo " of outputting them." + echo " If the -s option is used, only paired reads will be written to this" + echo " file." + echo "" + echo " -2, --read2" + echo " type: file, output, file must exist" + echo " Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead" + echo " of outputting them." + echo " If the -s option is used, only paired reads will be written to this" + echo " file." + echo "" + echo " -o, --output_reads" + echo " type: file, output, file must exist" + echo " Write reads with either READ1 FLAG or READ2 flag set to FILE instead of" + echo " outputting them to stdout." + echo " This is equivalent to -1 FILE -2 FILE." + echo "" + echo " 0, --output_reads_both" + echo " type: file, output, file must exist" + echo " Write reads where the READ1 and READ2 FLAG bits set are either both set" + echo " or both unset to FILE" + echo " instead of outputting them." + echo "" + echo " -f, --filter_flags" + echo " type: integer" + echo " example: 0" + echo " Only output alignments with all bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/). Default: \`0\`." + echo "" + echo " -F, --excl_flags" + echo " type: string" + echo " example: 0x900" + echo " Do not output alignments with any bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of" + echo " secondary and" + echo " supplementary alignments. Default: \`0x900\`." + echo "" + echo " --rf, --incl_flags" + echo " type: string" + echo " example: 0" + echo " Only output alignments with any bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/), in octal by" + echo " beginning with '0'" + echo " (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a" + echo " comma-separated list of" + echo " flag names. Default: \`0\`." + echo "" + echo " -G, --excl_flags_all" + echo " type: integer" + echo " example: 0" + echo " Only EXCLUDE reads with all of the bits set in INT present in the FLAG" + echo " field. INT can be specified" + echo " in hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/) or in octal by" + echo " beginning with '0' (i.e. /^0[0-7]+/)." + echo " Default: \`0\`." + echo "" + echo " -d, --aux_tag" + echo " type: string" + echo " Only output alignments containing an auxiliary tag matching both TAG and" + echo " VAL. If VAL is omitted" + echo " then any value is accepted. The tag types supported are i, f, Z, A and" + echo " H. \"B\" arrays are not" + echo " supported. This is comparable to the method used in samtools view --tag." + echo " The option may be specified" + echo " multiple times and is equivalent to using the --aux_tag_file option." + echo "" + echo " -D, --aux_tag_file" + echo " type: string" + echo " Only output alignments containing an auxiliary tag matching TAG and" + echo " having a value listed in FILE." + echo " The format of the file is one line per value. This is equivalent to" + echo " specifying --aux_tag multiple times." + echo "" + echo " -i, --casava" + echo " type: boolean_true" + echo " Add Illumina Casava 1.8 format entry to header, for example:" + echo " \`1:N:0:ATCACG\`." + echo "" + echo " -c, --compression" + echo " type: integer" + echo " example: 0" + echo " set compression level when writing gz or bgzf fastq files." + echo "" + echo " --i1, --index1" + echo " type: file, file must exist" + echo " write first index reads to FILE." + echo "" + echo " --i2, --index2" + echo " type: file, file must exist" + echo " write second index reads to FILE." + echo "" + echo " --barcode_tag" + echo " type: string" + echo " example: BC" + echo " Auxiliary tag to find index reads in. Default: \`BC\`." + echo "" + echo " --quality_tag" + echo " type: string" + echo " example: QT" + echo " Auxiliary tag to find index quality in. Default: \`QT\`." + echo "" + echo " --index_format" + echo " type: string" + echo " string to describe how to parse the barcode and quality tags. For" + echo " example:" + echo " * \`i14i8\`: the first 14 characters are index 1, the next 8 characters" + echo " are index 2." + echo " * \`n8i14\`: ignore the first 8 characters, and use the next 14 characters" + echo " for index 1." + echo " If the tag contains a separator, then the numeric part can be replaced" + echo " with '*' to mean" + echo " 'read until the separator or end of tag', for example: \`n*i*\`." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_flagstat/.config.vsh.yaml b/target/executable/samtools/samtools_flagstat/.config.vsh.yaml index 532c5c1a..7be99a25 100644 --- a/target/executable/samtools/samtools_flagstat/.config.vsh.yaml +++ b/target/executable/samtools/samtools_flagstat/.config.vsh.yaml @@ -66,6 +66,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -172,16 +175,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_flagstat" executable: "target/executable/samtools/samtools_flagstat/samtools_flagstat" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_flagstat/samtools_flagstat b/target/executable/samtools/samtools_flagstat/samtools_flagstat index a9b1bfb2..67f28cab 100755 --- a/target/executable/samtools/samtools_flagstat/samtools_flagstat +++ b/target/executable/samtools/samtools_flagstat/samtools_flagstat @@ -2,7 +2,7 @@ # samtools_flagstat main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,27 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_flagstat main" - echo "" - echo "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type." - echo "" - echo "Inputs:" - echo " --bam" - echo " type: file, file must exist" - echo " BAM input files." - echo "" - echo " --bai" - echo " type: file, file must exist" - echo " BAM index file." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.flagstat" - echo " File containing samtools stats output." -} # initialise variables VIASH_MODE='run' @@ -474,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_flagstat" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -591,6 +570,53 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_flagstat main" + echo "" + echo "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type." + echo "" + echo "Inputs:" + echo " --bam" + echo " type: file, file must exist" + echo " BAM input files." + echo "" + echo " --bai" + echo " type: file, file must exist" + echo " BAM index file." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.flagstat" + echo " File containing samtools stats output." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_idxstats/.config.vsh.yaml b/target/executable/samtools/samtools_idxstats/.config.vsh.yaml index c6981d2d..2f8322db 100644 --- a/target/executable/samtools/samtools_idxstats/.config.vsh.yaml +++ b/target/executable/samtools/samtools_idxstats/.config.vsh.yaml @@ -75,6 +75,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -182,16 +185,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_idxstats" executable: "target/executable/samtools/samtools_idxstats/samtools_idxstats" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_idxstats/samtools_idxstats b/target/executable/samtools/samtools_idxstats/samtools_idxstats index 95962259..76df52e4 100755 --- a/target/executable/samtools/samtools_idxstats/samtools_idxstats +++ b/target/executable/samtools/samtools_idxstats/samtools_idxstats @@ -2,7 +2,7 @@ # samtools_idxstats main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,31 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_idxstats main" - echo "" - echo "Reports alignment summary statistics for a BAM file." - echo "" - echo "Inputs:" - echo " --bam" - echo " type: file, file must exist" - echo " BAM input file." - echo "" - echo " --bai" - echo " type: file, file must exist" - echo " BAM index file." - echo "" - echo " --fasta" - echo " type: file, file must exist" - echo " Reference file the CRAM was created with (optional)." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.idxstats" - echo " File containing samtools stats output in tab-delimited format." -} # initialise variables VIASH_MODE='run' @@ -478,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_idxstats" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:39Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -595,6 +570,57 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_idxstats main" + echo "" + echo "Reports alignment summary statistics for a BAM file." + echo "" + echo "Inputs:" + echo " --bam" + echo " type: file, file must exist" + echo " BAM input file." + echo "" + echo " --bai" + echo " type: file, file must exist" + echo " BAM index file." + echo "" + echo " --fasta" + echo " type: file, file must exist" + echo " Reference file the CRAM was created with (optional)." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.idxstats" + echo " File containing samtools stats output in tab-delimited format." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_index/.config.vsh.yaml b/target/executable/samtools/samtools_index/.config.vsh.yaml index 4cd45f17..40351902 100644 --- a/target/executable/samtools/samtools_index/.config.vsh.yaml +++ b/target/executable/samtools/samtools_index/.config.vsh.yaml @@ -84,6 +84,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -188,16 +191,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_index" executable: "target/executable/samtools/samtools_index/samtools_index" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_index/samtools_index b/target/executable/samtools/samtools_index/samtools_index index 946cc145..3d89c0d4 100755 --- a/target/executable/samtools/samtools_index/samtools_index +++ b/target/executable/samtools/samtools_index/samtools_index @@ -2,7 +2,7 @@ # samtools_index main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,38 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_index main" - echo "" - echo "Index SAM/BAM/CRAM files." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Input file name" - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: out.bam.bai" - echo " Output file name" - echo "" - echo "Options:" - echo " -b, --bai" - echo " type: boolean_true" - echo " Generate BAM index" - echo "" - echo " -c, --csi" - echo " type: boolean_true" - echo " Create a CSI index for BAM files instead of the traditional BAI" - echo " index. This will be required for genomes with larger chromosome" - echo " sizes." - echo "" - echo " -m, --min_shift" - echo " type: integer" - echo " Create a CSI index, with a minimum interval size of 2^INT." -} # initialise variables VIASH_MODE='run' @@ -485,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_index" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -602,6 +570,64 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_index main" + echo "" + echo "Index SAM/BAM/CRAM files." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Input file name" + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: out.bam.bai" + echo " Output file name" + echo "" + echo "Options:" + echo " -b, --bai" + echo " type: boolean_true" + echo " Generate BAM index" + echo "" + echo " -c, --csi" + echo " type: boolean_true" + echo " Create a CSI index for BAM files instead of the traditional BAI" + echo " index. This will be required for genomes with larger chromosome" + echo " sizes." + echo "" + echo " -m, --min_shift" + echo " type: integer" + echo " Create a CSI index, with a minimum interval size of 2^INT." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_sort/.config.vsh.yaml b/target/executable/samtools/samtools_sort/.config.vsh.yaml index 092019df..5487ae1e 100644 --- a/target/executable/samtools/samtools_sort/.config.vsh.yaml +++ b/target/executable/samtools/samtools_sort/.config.vsh.yaml @@ -227,6 +227,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -331,16 +334,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_sort" executable: "target/executable/samtools/samtools_sort/samtools_sort" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_sort/samtools_sort b/target/executable/samtools/samtools_sort/samtools_sort index 311a300f..24b4b039 100755 --- a/target/executable/samtools/samtools_sort/samtools_sort +++ b/target/executable/samtools/samtools_sort/samtools_sort @@ -2,7 +2,7 @@ # samtools_sort main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,109 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_sort main" - echo "" - echo "Sort SAM/BAM/CRAM file." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " SAM/BAM/CRAM input file." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: out.bam" - echo " Write final output to file." - echo "" - echo " -O, --output_fmt" - echo " type: string" - echo " example: BAM" - echo " Specify output format (SAM, BAM, CRAM)." - echo "" - echo " --output_fmt_option" - echo " type: string" - echo " Specify a single output file format option in the form" - echo " of OPTION or OPTION=VALUE." - echo "" - echo " --reference" - echo " type: file, file must exist" - echo " example: ref.fa" - echo " Reference sequence FASTA FILE." - echo "" - echo " --write_index" - echo " type: boolean_true" - echo " Automatically index the output files." - echo "" - echo " -T, --prefix" - echo " type: string" - echo " Write temporary files to PREFIX.nnnn.bam." - echo "" - echo " --no_PG" - echo " type: boolean_true" - echo " Do not add a PG line." - echo "" - echo " --template_coordinate" - echo " type: boolean_true" - echo " Sort by template-coordinate." - echo "" - echo " --input_fmt_option" - echo " type: string" - echo " Specify a single input file format option in the form" - echo " of OPTION or OPTION=VALUE." - echo "" - echo "Options:" - echo " -l, --compression" - echo " type: integer" - echo " default: 0" - echo " Set compression level, from 0 (uncompressed) to 9 (best)." - echo "" - echo " -u, --uncompressed" - echo " type: boolean_true" - echo " Output uncompressed data (equivalent to --compression 0)." - echo "" - echo " -M, --minimiser" - echo " type: boolean_true" - echo " Use minimiser for clustering unaligned/unplaced reads." - echo "" - echo " -R, --not_reverse" - echo " type: boolean_true" - echo " Do not use reverse strand (only compatible with --minimiser)" - echo "" - echo " -K, --kmer_size" - echo " type: integer" - echo " example: 20" - echo " Kmer size to use for minimiser." - echo "" - echo " -I, --order" - echo " type: file, file must exist" - echo " example: ref.fa" - echo " Order minimisers by their position in FILE FASTA." - echo "" - echo " -w, --window" - echo " type: integer" - echo " example: 100" - echo " Window size for minimiser INDEXING VIA --order REF.FA." - echo "" - echo " -H, --homopolymers" - echo " type: boolean_true" - echo " Squash homopolymers when computing minimiser." - echo "" - echo " -n, --natural_sort" - echo " type: boolean_true" - echo " Sort by read name (natural): cannot be used with samtools index." - echo "" - echo " -N, --ascii_sort" - echo " type: boolean_true" - echo " Sort by read name (ASCII): cannot be used with samtools index." - echo "" - echo " -t, --tag" - echo " type: string" - echo " Sort by value of TAG. Uses position as secondary index" - echo " (or read name if --natural_sort is set)." -} # initialise variables VIASH_MODE='run' @@ -556,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_sort" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -673,6 +570,135 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_sort main" + echo "" + echo "Sort SAM/BAM/CRAM file." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " SAM/BAM/CRAM input file." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: out.bam" + echo " Write final output to file." + echo "" + echo " -O, --output_fmt" + echo " type: string" + echo " example: BAM" + echo " Specify output format (SAM, BAM, CRAM)." + echo "" + echo " --output_fmt_option" + echo " type: string" + echo " Specify a single output file format option in the form" + echo " of OPTION or OPTION=VALUE." + echo "" + echo " --reference" + echo " type: file, file must exist" + echo " example: ref.fa" + echo " Reference sequence FASTA FILE." + echo "" + echo " --write_index" + echo " type: boolean_true" + echo " Automatically index the output files." + echo "" + echo " -T, --prefix" + echo " type: string" + echo " Write temporary files to PREFIX.nnnn.bam." + echo "" + echo " --no_PG" + echo " type: boolean_true" + echo " Do not add a PG line." + echo "" + echo " --template_coordinate" + echo " type: boolean_true" + echo " Sort by template-coordinate." + echo "" + echo " --input_fmt_option" + echo " type: string" + echo " Specify a single input file format option in the form" + echo " of OPTION or OPTION=VALUE." + echo "" + echo "Options:" + echo " -l, --compression" + echo " type: integer" + echo " default: 0" + echo " Set compression level, from 0 (uncompressed) to 9 (best)." + echo "" + echo " -u, --uncompressed" + echo " type: boolean_true" + echo " Output uncompressed data (equivalent to --compression 0)." + echo "" + echo " -M, --minimiser" + echo " type: boolean_true" + echo " Use minimiser for clustering unaligned/unplaced reads." + echo "" + echo " -R, --not_reverse" + echo " type: boolean_true" + echo " Do not use reverse strand (only compatible with --minimiser)" + echo "" + echo " -K, --kmer_size" + echo " type: integer" + echo " example: 20" + echo " Kmer size to use for minimiser." + echo "" + echo " -I, --order" + echo " type: file, file must exist" + echo " example: ref.fa" + echo " Order minimisers by their position in FILE FASTA." + echo "" + echo " -w, --window" + echo " type: integer" + echo " example: 100" + echo " Window size for minimiser INDEXING VIA --order REF.FA." + echo "" + echo " -H, --homopolymers" + echo " type: boolean_true" + echo " Squash homopolymers when computing minimiser." + echo "" + echo " -n, --natural_sort" + echo " type: boolean_true" + echo " Sort by read name (natural): cannot be used with samtools index." + echo "" + echo " -N, --ascii_sort" + echo " type: boolean_true" + echo " Sort by read name (ASCII): cannot be used with samtools index." + echo "" + echo " -t, --tag" + echo " type: string" + echo " Sort by value of TAG. Uses position as secondary index" + echo " (or read name if --natural_sort is set)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_stats/.config.vsh.yaml b/target/executable/samtools/samtools_stats/.config.vsh.yaml index 46bc1acb..208337de 100644 --- a/target/executable/samtools/samtools_stats/.config.vsh.yaml +++ b/target/executable/samtools/samtools_stats/.config.vsh.yaml @@ -295,6 +295,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -400,16 +403,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_stats" executable: "target/executable/samtools/samtools_stats/samtools_stats" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_stats/samtools_stats b/target/executable/samtools/samtools_stats/samtools_stats index 42ef5bae..84ba3d2c 100755 --- a/target/executable/samtools/samtools_stats/samtools_stats +++ b/target/executable/samtools/samtools_stats/samtools_stats @@ -2,7 +2,7 @@ # samtools_stats main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,128 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_stats main" - echo "" - echo "Reports alignment summary statistics for a BAM file." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Input file." - echo "" - echo " --bai" - echo " type: file, file must exist" - echo " Index file." - echo "" - echo " --fasta" - echo " type: file, file must exist" - echo " Reference file the CRAM was created with." - echo "" - echo " -c, --coverage" - echo " type: integer, multiple values allowed" - echo " example: 1;1000;1" - echo " Coverage distribution min;max;step. Default: [1, 1000, 1]." - echo "" - echo " -d, --remove_dups" - echo " type: boolean_true" - echo " Exclude from statistics reads marked as duplicates." - echo "" - echo " -X, --customized_index_file" - echo " type: boolean_true" - echo " Use a customized index file." - echo "" - echo " -f, --required_flag" - echo " type: string" - echo " example: 0" - echo " Required flag, 0 for unset. See also \`samtools flags\`. Default: \`\"0\"\`." - echo "" - echo " -F, --filtering_flag" - echo " type: string" - echo " example: 0" - echo " Filtering flag, 0 for unset. See also \`samtools flags\`. Default: \`0\`." - echo "" - echo " --GC_depth" - echo " type: double" - echo " example: 20000.0" - echo " The size of GC-depth bins (decreasing bin size increases memory" - echo " requirement). Default: \`20000\`." - echo "" - echo " -i, --insert_size" - echo " type: integer" - echo " example: 8000" - echo " Maximum insert size. Default: \`8000\`." - echo "" - echo " -I, --id" - echo " type: string" - echo " Include only listed read group or sample name." - echo "" - echo " -l, --read_length" - echo " type: integer" - echo " example: -1" - echo " Include in the statistics only reads with the given read length." - echo " Default: \`-1\`." - echo "" - echo " -m, --most_inserts" - echo " type: double" - echo " example: 0.99" - echo " Report only the main part of inserts. Default: \`0.99\`." - echo "" - echo " -P, --split_prefix" - echo " type: string" - echo " Path or string prefix for filepaths output by --split (default is input" - echo " filename)." - echo "" - echo " -q, --trim_quality" - echo " type: integer" - echo " example: 0" - echo " The BWA trimming parameter. Default: \`0\`." - echo "" - echo " -r, --ref_seq" - echo " type: file, file must exist" - echo " Reference sequence (required for GC-depth and mismatches-per-cycle" - echo " calculation)." - echo "" - echo " -S, --split" - echo " type: string" - echo " Also write statistics to separate files split by tagged field." - echo "" - echo " -t, --target_regions" - echo " type: file, file must exist" - echo " Do stats in these regions only. Tab-delimited file chr,from,to, 1-based," - echo " inclusive." - echo "" - echo " -x, --sparse" - echo " type: boolean_true" - echo " Suppress outputting IS rows where there are no insertions." - echo "" - echo " -p, --remove_overlaps" - echo " type: boolean_true" - echo " Remove overlaps of paired-end reads from coverage and base count" - echo " computations." - echo "" - echo " -g, --cov_threshold" - echo " type: integer" - echo " example: 0" - echo " Only bases with coverage above this value will be included in the target" - echo " percentage computation. Default: \`0\`." - echo "" - echo " --input_fmt_option" - echo " type: string" - echo " Specify a single input file format option in the form of OPTION or" - echo " OPTION=VALUE." - echo "" - echo " --reference" - echo " type: file, file must exist" - echo " Reference sequence FASTA FILE." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: out.txt" - echo " Output file." -} # initialise variables VIASH_MODE='run' @@ -575,9 +453,9 @@ sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Emma Rousseau" LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_stats" -LABEL org.opencontainers.image.created="2024-12-03T10:33:59Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -692,6 +570,154 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_stats main" + echo "" + echo "Reports alignment summary statistics for a BAM file." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Input file." + echo "" + echo " --bai" + echo " type: file, file must exist" + echo " Index file." + echo "" + echo " --fasta" + echo " type: file, file must exist" + echo " Reference file the CRAM was created with." + echo "" + echo " -c, --coverage" + echo " type: integer, multiple values allowed" + echo " example: 1;1000;1" + echo " Coverage distribution min;max;step. Default: [1, 1000, 1]." + echo "" + echo " -d, --remove_dups" + echo " type: boolean_true" + echo " Exclude from statistics reads marked as duplicates." + echo "" + echo " -X, --customized_index_file" + echo " type: boolean_true" + echo " Use a customized index file." + echo "" + echo " -f, --required_flag" + echo " type: string" + echo " example: 0" + echo " Required flag, 0 for unset. See also \`samtools flags\`. Default: \`\"0\"\`." + echo "" + echo " -F, --filtering_flag" + echo " type: string" + echo " example: 0" + echo " Filtering flag, 0 for unset. See also \`samtools flags\`. Default: \`0\`." + echo "" + echo " --GC_depth" + echo " type: double" + echo " example: 20000.0" + echo " The size of GC-depth bins (decreasing bin size increases memory" + echo " requirement). Default: \`20000\`." + echo "" + echo " -i, --insert_size" + echo " type: integer" + echo " example: 8000" + echo " Maximum insert size. Default: \`8000\`." + echo "" + echo " -I, --id" + echo " type: string" + echo " Include only listed read group or sample name." + echo "" + echo " -l, --read_length" + echo " type: integer" + echo " example: -1" + echo " Include in the statistics only reads with the given read length." + echo " Default: \`-1\`." + echo "" + echo " -m, --most_inserts" + echo " type: double" + echo " example: 0.99" + echo " Report only the main part of inserts. Default: \`0.99\`." + echo "" + echo " -P, --split_prefix" + echo " type: string" + echo " Path or string prefix for filepaths output by --split (default is input" + echo " filename)." + echo "" + echo " -q, --trim_quality" + echo " type: integer" + echo " example: 0" + echo " The BWA trimming parameter. Default: \`0\`." + echo "" + echo " -r, --ref_seq" + echo " type: file, file must exist" + echo " Reference sequence (required for GC-depth and mismatches-per-cycle" + echo " calculation)." + echo "" + echo " -S, --split" + echo " type: string" + echo " Also write statistics to separate files split by tagged field." + echo "" + echo " -t, --target_regions" + echo " type: file, file must exist" + echo " Do stats in these regions only. Tab-delimited file chr,from,to, 1-based," + echo " inclusive." + echo "" + echo " -x, --sparse" + echo " type: boolean_true" + echo " Suppress outputting IS rows where there are no insertions." + echo "" + echo " -p, --remove_overlaps" + echo " type: boolean_true" + echo " Remove overlaps of paired-end reads from coverage and base count" + echo " computations." + echo "" + echo " -g, --cov_threshold" + echo " type: integer" + echo " example: 0" + echo " Only bases with coverage above this value will be included in the target" + echo " percentage computation. Default: \`0\`." + echo "" + echo " --input_fmt_option" + echo " type: string" + echo " Specify a single input file format option in the form of OPTION or" + echo " OPTION=VALUE." + echo "" + echo " --reference" + echo " type: file, file must exist" + echo " Reference sequence FASTA FILE." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: out.txt" + echo " Output file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/samtools/samtools_view/.config.vsh.yaml b/target/executable/samtools/samtools_view/.config.vsh.yaml index a8bce0fb..8cb7b649 100644 --- a/target/executable/samtools/samtools_view/.config.vsh.yaml +++ b/target/executable/samtools/samtools_view/.config.vsh.yaml @@ -559,6 +559,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -664,16 +667,16 @@ build_info: engine: "docker|native" output: "target/executable/samtools/samtools_view" executable: "target/executable/samtools/samtools_view/samtools_view" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/samtools/samtools_view/samtools_view b/target/executable/samtools/samtools_view/samtools_view index 510371d6..c13166de 100755 --- a/target/executable/samtools/samtools_view/samtools_view +++ b/target/executable/samtools/samtools_view/samtools_view @@ -2,7 +2,7 @@ # samtools_view main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,405 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 +ENTRYPOINT [] +RUN samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ +sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Emma Rousseau" +LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_view" +LABEL org.opencontainers.image.created="2025-03-06T09:19:38Z" +LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "samtools_view main" @@ -543,405 +942,32 @@ function ViashHelp { echo " --write_index" echo " type: boolean_true" echo " Automatically index the output files." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 -ENTRYPOINT [] -RUN samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ -sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Emma Rousseau" -LABEL org.opencontainers.image.description="Companion container for running component samtools samtools_view" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" -LABEL org.opencontainers.image.source="https://github.com/samtools/samtools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/seqtk/seqtk_sample/.config.vsh.yaml b/target/executable/seqtk/seqtk_sample/.config.vsh.yaml index 2b9094cb..334cdbd4 100644 --- a/target/executable/seqtk/seqtk_sample/.config.vsh.yaml +++ b/target/executable/seqtk/seqtk_sample/.config.vsh.yaml @@ -80,6 +80,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -172,16 +175,16 @@ build_info: engine: "docker|native" output: "target/executable/seqtk/seqtk_sample" executable: "target/executable/seqtk/seqtk_sample/seqtk_sample" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/seqtk/seqtk_sample/seqtk_sample b/target/executable/seqtk/seqtk_sample/seqtk_sample index d478abc7..84f4eeb3 100755 --- a/target/executable/seqtk/seqtk_sample/seqtk_sample +++ b/target/executable/seqtk/seqtk_sample/seqtk_sample @@ -2,7 +2,7 @@ # seqtk_sample main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,37 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "seqtk_sample main" - echo "" - echo "Subsamples sequences from FASTA/Q files." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The input FASTA/Q file." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " The output FASTA/Q file." - echo "" - echo "Options:" - echo " --seed" - echo " type: integer" - echo " example: 42" - echo " Seed for random generator." - echo "" - echo " --fraction_number" - echo " type: double, required parameter" - echo " example: 0.1" - echo " Fraction or number of sequences to sample." - echo "" - echo " --two_pass_mode" - echo " type: boolean_true" - echo " Twice as slow but with much reduced memory" -} # initialise variables VIASH_MODE='run' @@ -481,9 +450,9 @@ FROM quay.io/biocontainers/seqtk:1.4--he4a0461_2 ENTRYPOINT [] LABEL org.opencontainers.image.authors="Jakub Majercik" LABEL org.opencontainers.image.description="Companion container for running component seqtk seqtk_sample" -LABEL org.opencontainers.image.created="2024-12-03T10:33:56Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/lh3/seqtk/tree/v1.4" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -598,6 +567,63 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "seqtk_sample main" + echo "" + echo "Subsamples sequences from FASTA/Q files." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The input FASTA/Q file." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " The output FASTA/Q file." + echo "" + echo "Options:" + echo " --seed" + echo " type: integer" + echo " example: 42" + echo " Seed for random generator." + echo "" + echo " --fraction_number" + echo " type: double, required parameter" + echo " example: 0.1" + echo " Fraction or number of sequences to sample." + echo "" + echo " --two_pass_mode" + echo " type: boolean_true" + echo " Twice as slow but with much reduced memory" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/seqtk/seqtk_subseq/.config.vsh.yaml b/target/executable/seqtk/seqtk_subseq/.config.vsh.yaml index e1a1a416..cbf68a9c 100644 --- a/target/executable/seqtk/seqtk_subseq/.config.vsh.yaml +++ b/target/executable/seqtk/seqtk_subseq/.config.vsh.yaml @@ -99,6 +99,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,16 +198,16 @@ build_info: engine: "docker|native" output: "target/executable/seqtk/seqtk_subseq" executable: "target/executable/seqtk/seqtk_subseq/seqtk_subseq" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/seqtk/seqtk_subseq/seqtk_subseq b/target/executable/seqtk/seqtk_subseq/seqtk_subseq index 2fd70a48..61feebcb 100755 --- a/target/executable/seqtk/seqtk_subseq/seqtk_subseq +++ b/target/executable/seqtk/seqtk_subseq/seqtk_subseq @@ -2,7 +2,7 @@ # seqtk_subseq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,45 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "seqtk_subseq main" - echo "" - echo "Extract subsequences from FASTA/Q files. Takes as input a FASTA/Q file and a" - echo "name.lst (sequence ids file) or a reg.bed (genomic regions file)." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.fa" - echo " The input FASTA/Q file." - echo "" - echo " --name_list" - echo " type: file, required parameter, file must exist" - echo " example: list.lst" - echo " List of sequence names (name.lst) or genomic regions (reg.bed) to" - echo " extract." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.fa" - echo " The output FASTA/Q file." - echo "" - echo "Options:" - echo " -t, --tab" - echo " type: boolean_true" - echo " TAB delimited output." - echo "" - echo " -s, --strand_aware" - echo " type: boolean_true" - echo " Strand aware." - echo "" - echo " -l, --sequence_line_length" - echo " type: integer" - echo " example: 0" - echo " Sequence line length of input fasta file. Default: 0." -} # initialise variables VIASH_MODE='run' @@ -491,9 +452,9 @@ RUN echo $(echo $(seqtk 2>&1) | sed -n 's/.*\(Version: [^ ]*\).*/\1/p') > /var/s LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo" LABEL org.opencontainers.image.description="Companion container for running component seqtk seqtk_subseq" -LABEL org.opencontainers.image.created="2024-12-03T10:33:56Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/lh3/seqtk/tree/v1.4" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -608,6 +569,71 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "seqtk_subseq main" + echo "" + echo "Extract subsequences from FASTA/Q files. Takes as input a FASTA/Q file and a" + echo "name.lst (sequence ids file) or a reg.bed (genomic regions file)." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.fa" + echo " The input FASTA/Q file." + echo "" + echo " --name_list" + echo " type: file, required parameter, file must exist" + echo " example: list.lst" + echo " List of sequence names (name.lst) or genomic regions (reg.bed) to" + echo " extract." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.fa" + echo " The output FASTA/Q file." + echo "" + echo "Options:" + echo " -t, --tab" + echo " type: boolean_true" + echo " TAB delimited output." + echo "" + echo " -s, --strand_aware" + echo " type: boolean_true" + echo " Strand aware." + echo "" + echo " -l, --sequence_line_length" + echo " type: integer" + echo " example: 0" + echo " Sequence line length of input fasta file. Default: 0." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/sgdemux/.config.vsh.yaml b/target/executable/sgdemux/.config.vsh.yaml index 6519f639..8d567451 100644 --- a/target/executable/sgdemux/.config.vsh.yaml +++ b/target/executable/sgdemux/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -428,16 +431,16 @@ build_info: engine: "docker|native" output: "target/executable/sgdemux" executable: "target/executable/sgdemux/sgdemux" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/sgdemux/sgdemux b/target/executable/sgdemux/sgdemux index f0605c73..e959903b 100755 --- a/target/executable/sgdemux/sgdemux +++ b/target/executable/sgdemux/sgdemux @@ -2,7 +2,7 @@ # sgdemux main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,180 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "sgdemux main" - echo "" - echo "Demultiplex sequence data generated on Singular Genomics' sequencing" - echo "instruments." - echo "" - echo "Input:" - echo " -f, --fastqs" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" - echo " Path to the input FASTQs, or path prefix if not a file" - echo "" - echo " -s, --sample_metadata" - echo " type: file, required parameter, file must exist" - echo " Path to the sample metadata CSV file including sample names and barcode" - echo " sequences" - echo "" - echo "Output:" - echo " --sample_fastq" - echo " type: file, required parameter, output, file must exist" - echo " example: output" - echo " The directory containing demultiplexed sample FASTQ files." - echo "" - echo " --metrics" - echo " type: file, output, file must exist" - echo " example: metrics.tsv" - echo " Demultiplexing summary statisitcs:" - echo " - control_reads_omitted: The number of reads that were omitted for" - echo " being control reads." - echo " - failing_reads_omitted: The number of reads that were omitted for" - echo " having failed QC." - echo " - total_templates: The total number of template reads that were" - echo " output." - echo "" - echo " --most_frequent_unmatched" - echo " type: file, output, file must exist" - echo " example: most_frequent_unmatched.tsv" - echo " It contains the (approximate) counts of the most prevelant observed" - echo " barcode sequences" - echo " that did not match to one of the expected barcodes. Can only be created" - echo " when 'most_unmatched_to_output'" - echo " is not set to 0." - echo "" - echo " --sample_barcode_hop_metrics" - echo " type: file, output, file must exist" - echo " example: sample_barcode_hop_metrics.tsv" - echo " File containing the frequently observed barcodes that are unexpected" - echo " combinations of expected barcodes in a dual-indexed run." - echo "" - echo " --per_project_metrics" - echo " type: file, output, file must exist" - echo " example: per_project_metrics.tsv" - echo " Aggregates the metrics by project (aggregates the metrics across samples" - echo " with the same project) and" - echo " has the same columns as \`--metrics\`. In this case, sample_ID will" - echo " contain the project name (or None if no project is given)." - echo " THe barcode will contain all Ns. The undetermined sample will not be" - echo " aggregated with any other sample." - echo "" - echo " --per_sample_metrics" - echo " type: file, output, file must exist" - echo " example: per_sample_metrics.tsv" - echo " Tab-separated file containing statistics per sample." - echo "" - echo "Arguments:" - echo " -r, --read_structures" - echo " type: string, multiple values allowed" - echo " Read structures, one per input FASTQ. Do not provide when using a path" - echo " prefix for FASTQs" - echo "" - echo " -m, --allowed_mismatches" - echo " type: integer" - echo " example: 1" - echo " Number of allowed mismatches between the observed barcode and the" - echo " expected barcode" - echo "" - echo " -d, --min_delta" - echo " type: integer" - echo " example: 2" - echo " The minimum allowed difference between an observed barcode and the" - echo " second closest expected barcode" - echo "" - echo " -F, --free_ns" - echo " type: integer" - echo " example: 1" - echo " Number of N's to allow in a barcode without counting against the" - echo " allowed_mismatches" - echo "" - echo " -N, --max_no_calls" - echo " type: integer" - echo " Max no-calls (N's) in a barcode before it is considered unmatchable." - echo " A barcode with total N's greater than 'max_no_call' will be considered" - echo " unmatchable." - echo "" - echo " -M, --quality_mask_threshold" - echo " type: integer, multiple values allowed" - echo " Mask template bases with quality scores less than specified value(s)." - echo " Sample barcode/index and UMI bases are never masked. If provided either" - echo " a single value," - echo " or one value per FASTQ must be provided." - echo "" - echo " -C, --filter_control_reads" - echo " type: boolean_true" - echo " Filter out control reads" - echo "" - echo " -Q, --filter_failing_quality" - echo " type: boolean_true" - echo " Filter reads failing quality filter" - echo "" - echo " -T, --output_types" - echo " type: string, multiple values allowed" - echo " example: T" - echo " choices: [ T, B, S, M ]" - echo " The types of output FASTQs to write." - echo " For each read structure, all segment types listed will be output to a" - echo " FASTQ file." - echo " These may be any of the following:" - echo " - \`T\` - Template bases" - echo " - \`B\` - Sample barcode bases" - echo " - \`M\` - Molecular barcode bases" - echo " - \`S\` - Skip bases" - echo "" - echo " -u, --undetermined_sample_name" - echo " type: string" - echo " example: Undetermined" - echo " The sample name for undetermined reads (reads that do not match an" - echo " expected barcode)" - echo "" - echo " -U, --most_unmatched_to_output" - echo " type: integer" - echo " example: 1000" - echo " Output the most frequent \"unmatched\" barcodes up to this number." - echo " If set to 0 unmatched barcodes will not be collected, improving overall" - echo " performance." - echo "" - echo " --override_matcher" - echo " type: string" - echo " choices: [ cached-hamming-distance, pre-compute ]" - echo " If the sample barcodes are > 12 bp long, a cached hamming distance" - echo " matcher is used." - echo " If the barcodes are less than or equal to 12 bp long, all possible" - echo " matches are precomputed." - echo " This option allows for overriding that heuristic." - echo "" - echo " --skip_read_name_check" - echo " type: boolean_true" - echo " If this is true, then all the read names across FASTQs will not be" - echo " enforced to be the same." - echo " This may be useful when the read names are known to be the same and" - echo " performance matters." - echo " Regardless, the first read name in each FASTQ will always be checked." - echo "" - echo " --sample_barcode_in_fastq_header" - echo " type: boolean_true" - echo " If this is true, then the sample barcode is expected to be in the FASTQ" - echo " read header." - echo " For dual indexed data, the barcodes must be \`+\` (plus) delimited." - echo " Additionally, if true," - echo " then neither index FASTQ files nor sample barcode segments in the read" - echo " structure may be specified." - echo "" - echo " --metric_prefix" - echo " type: string" - echo " Prepend this prefix to all output metric file names" - echo "" - echo " -l, --lane" - echo " type: integer, multiple values allowed" - echo " Select a subset of lanes to demultiplex. Will cause only samples and" - echo " input FASTQs with" - echo " the given \`Lane\`(s) to be demultiplexed. Samples without a lane will be" - echo " ignored, and" - echo " FASTQs without lane information will be ignored" -} # initialise variables VIASH_MODE='run' @@ -631,9 +457,9 @@ echo "sgdemux: $(sgdemux --version | cut -d' ' -f2)" > /var/software_versions.tx LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component sgdemux" -LABEL org.opencontainers.image.created="2024-12-03T10:34:00Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:45Z" LABEL org.opencontainers.image.source="https://github.com/Singular-Genomics/singular-demux" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -748,6 +574,206 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sgdemux main" + echo "" + echo "Demultiplex sequence data generated on Singular Genomics' sequencing" + echo "instruments." + echo "" + echo "Input:" + echo " -f, --fastqs" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" + echo " Path to the input FASTQs, or path prefix if not a file" + echo "" + echo " -s, --sample_metadata" + echo " type: file, required parameter, file must exist" + echo " Path to the sample metadata CSV file including sample names and barcode" + echo " sequences" + echo "" + echo "Output:" + echo " --sample_fastq" + echo " type: file, required parameter, output, file must exist" + echo " example: output" + echo " The directory containing demultiplexed sample FASTQ files." + echo "" + echo " --metrics" + echo " type: file, output, file must exist" + echo " example: metrics.tsv" + echo " Demultiplexing summary statisitcs:" + echo " - control_reads_omitted: The number of reads that were omitted for" + echo " being control reads." + echo " - failing_reads_omitted: The number of reads that were omitted for" + echo " having failed QC." + echo " - total_templates: The total number of template reads that were" + echo " output." + echo "" + echo " --most_frequent_unmatched" + echo " type: file, output, file must exist" + echo " example: most_frequent_unmatched.tsv" + echo " It contains the (approximate) counts of the most prevelant observed" + echo " barcode sequences" + echo " that did not match to one of the expected barcodes. Can only be created" + echo " when 'most_unmatched_to_output'" + echo " is not set to 0." + echo "" + echo " --sample_barcode_hop_metrics" + echo " type: file, output, file must exist" + echo " example: sample_barcode_hop_metrics.tsv" + echo " File containing the frequently observed barcodes that are unexpected" + echo " combinations of expected barcodes in a dual-indexed run." + echo "" + echo " --per_project_metrics" + echo " type: file, output, file must exist" + echo " example: per_project_metrics.tsv" + echo " Aggregates the metrics by project (aggregates the metrics across samples" + echo " with the same project) and" + echo " has the same columns as \`--metrics\`. In this case, sample_ID will" + echo " contain the project name (or None if no project is given)." + echo " THe barcode will contain all Ns. The undetermined sample will not be" + echo " aggregated with any other sample." + echo "" + echo " --per_sample_metrics" + echo " type: file, output, file must exist" + echo " example: per_sample_metrics.tsv" + echo " Tab-separated file containing statistics per sample." + echo "" + echo "Arguments:" + echo " -r, --read_structures" + echo " type: string, multiple values allowed" + echo " Read structures, one per input FASTQ. Do not provide when using a path" + echo " prefix for FASTQs" + echo "" + echo " -m, --allowed_mismatches" + echo " type: integer" + echo " example: 1" + echo " Number of allowed mismatches between the observed barcode and the" + echo " expected barcode" + echo "" + echo " -d, --min_delta" + echo " type: integer" + echo " example: 2" + echo " The minimum allowed difference between an observed barcode and the" + echo " second closest expected barcode" + echo "" + echo " -F, --free_ns" + echo " type: integer" + echo " example: 1" + echo " Number of N's to allow in a barcode without counting against the" + echo " allowed_mismatches" + echo "" + echo " -N, --max_no_calls" + echo " type: integer" + echo " Max no-calls (N's) in a barcode before it is considered unmatchable." + echo " A barcode with total N's greater than 'max_no_call' will be considered" + echo " unmatchable." + echo "" + echo " -M, --quality_mask_threshold" + echo " type: integer, multiple values allowed" + echo " Mask template bases with quality scores less than specified value(s)." + echo " Sample barcode/index and UMI bases are never masked. If provided either" + echo " a single value," + echo " or one value per FASTQ must be provided." + echo "" + echo " -C, --filter_control_reads" + echo " type: boolean_true" + echo " Filter out control reads" + echo "" + echo " -Q, --filter_failing_quality" + echo " type: boolean_true" + echo " Filter reads failing quality filter" + echo "" + echo " -T, --output_types" + echo " type: string, multiple values allowed" + echo " example: T" + echo " choices: [ T, B, S, M ]" + echo " The types of output FASTQs to write." + echo " For each read structure, all segment types listed will be output to a" + echo " FASTQ file." + echo " These may be any of the following:" + echo " - \`T\` - Template bases" + echo " - \`B\` - Sample barcode bases" + echo " - \`M\` - Molecular barcode bases" + echo " - \`S\` - Skip bases" + echo "" + echo " -u, --undetermined_sample_name" + echo " type: string" + echo " example: Undetermined" + echo " The sample name for undetermined reads (reads that do not match an" + echo " expected barcode)" + echo "" + echo " -U, --most_unmatched_to_output" + echo " type: integer" + echo " example: 1000" + echo " Output the most frequent \"unmatched\" barcodes up to this number." + echo " If set to 0 unmatched barcodes will not be collected, improving overall" + echo " performance." + echo "" + echo " --override_matcher" + echo " type: string" + echo " choices: [ cached-hamming-distance, pre-compute ]" + echo " If the sample barcodes are > 12 bp long, a cached hamming distance" + echo " matcher is used." + echo " If the barcodes are less than or equal to 12 bp long, all possible" + echo " matches are precomputed." + echo " This option allows for overriding that heuristic." + echo "" + echo " --skip_read_name_check" + echo " type: boolean_true" + echo " If this is true, then all the read names across FASTQs will not be" + echo " enforced to be the same." + echo " This may be useful when the read names are known to be the same and" + echo " performance matters." + echo " Regardless, the first read name in each FASTQ will always be checked." + echo "" + echo " --sample_barcode_in_fastq_header" + echo " type: boolean_true" + echo " If this is true, then the sample barcode is expected to be in the FASTQ" + echo " read header." + echo " For dual indexed data, the barcodes must be \`+\` (plus) delimited." + echo " Additionally, if true," + echo " then neither index FASTQ files nor sample barcode segments in the read" + echo " structure may be specified." + echo "" + echo " --metric_prefix" + echo " type: string" + echo " Prepend this prefix to all output metric file names" + echo "" + echo " -l, --lane" + echo " type: integer, multiple values allowed" + echo " Select a subset of lanes to demultiplex. Will cause only samples and" + echo " input FASTQs with" + echo " the given \`Lane\`(s) to be demultiplexed. Samples without a lane will be" + echo " ignored, and" + echo " FASTQs without lane information will be ignored" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/snpeff/.config.vsh.yaml b/target/executable/snpeff/.config.vsh.yaml index f2e67d56..6d014463 100644 --- a/target/executable/snpeff/.config.vsh.yaml +++ b/target/executable/snpeff/.config.vsh.yaml @@ -523,6 +523,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -627,16 +630,16 @@ build_info: engine: "docker|native" output: "target/executable/snpeff" executable: "target/executable/snpeff/snpeff" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/snpeff/snpeff b/target/executable/snpeff/snpeff index d03aed8d..4c58c84d 100755 --- a/target/executable/snpeff/snpeff +++ b/target/executable/snpeff/snpeff @@ -2,7 +2,7 @@ # snpeff main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,405 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/staphb/snpeff:5.2a +ENTRYPOINT [] +RUN version=$(snpEff -version) && \ +version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \ +echo "$version_trimmed" > /var/software_versions.txt + +LABEL org.opencontainers.image.description="Companion container for running component snpeff" +LABEL org.opencontainers.image.created="2025-03-06T09:19:42Z" +LABEL org.opencontainers.image.source="https://github.com/pcingola/SnpEff" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "snpeff main" @@ -474,405 +873,32 @@ function ViashHelp { echo " --upDownStreamLen, --ud" echo " type: integer" echo " Set upstream downstream interval length (in bases)." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/staphb/snpeff:5.2a -ENTRYPOINT [] -RUN version=$(snpEff -version) && \ -version_trimmed=$(echo "$version" | awk '{print $1, $2}') && \ -echo "$version_trimmed" > /var/software_versions.txt - -LABEL org.opencontainers.image.description="Companion container for running component snpeff" -LABEL org.opencontainers.image.created="2024-12-03T10:33:55Z" -LABEL org.opencontainers.image.source="https://github.com/pcingola/SnpEff" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/sortmerna/.config.vsh.yaml b/target/executable/sortmerna/.config.vsh.yaml index 46c03ed7..eaeb0e9c 100644 --- a/target/executable/sortmerna/.config.vsh.yaml +++ b/target/executable/sortmerna/.config.vsh.yaml @@ -487,6 +487,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -593,16 +596,16 @@ build_info: engine: "docker|native" output: "target/executable/sortmerna" executable: "target/executable/sortmerna/sortmerna" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/sortmerna/sortmerna b/target/executable/sortmerna/sortmerna index 05ea8108..b4f0cc71 100755 --- a/target/executable/sortmerna/sortmerna +++ b/target/executable/sortmerna/sortmerna @@ -2,7 +2,7 @@ # sortmerna main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,403 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 +ENTRYPOINT [] +RUN echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'` + +LABEL org.opencontainers.image.description="Companion container for running component sortmerna" +LABEL org.opencontainers.image.created="2025-03-06T09:19:43Z" +LABEL org.opencontainers.image.source="https://github.com/sortmerna/sortmerna" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "sortmerna main" @@ -460,403 +857,32 @@ function ViashHelp { echo " example: 1000" echo " Maximum number of positions to store for each unique L-mer. Set to 0 to" echo " store all positions. Default: '1000'" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 -ENTRYPOINT [] -RUN echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'` - -LABEL org.opencontainers.image.description="Companion container for running component sortmerna" -LABEL org.opencontainers.image.created="2024-12-03T10:33:57Z" -LABEL org.opencontainers.image.source="https://github.com/sortmerna/sortmerna" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/star/star_align_reads/.config.vsh.yaml b/target/executable/star/star_align_reads/.config.vsh.yaml index 3289059c..17a13890 100644 --- a/target/executable/star/star_align_reads/.config.vsh.yaml +++ b/target/executable/star/star_align_reads/.config.vsh.yaml @@ -2540,6 +2540,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -2662,16 +2665,16 @@ build_info: engine: "docker|native" output: "target/executable/star/star_align_reads" executable: "target/executable/star/star_align_reads/star_align_reads" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/star/star_align_reads/star_align_reads b/target/executable/star/star_align_reads/star_align_reads index c650db18..f9e3d04b 100755 --- a/target/executable/star/star_align_reads/star_align_reads +++ b/target/executable/star/star_align_reads/star_align_reads @@ -2,7 +2,7 @@ # star_align_reads main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -173,6 +173,426 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:3.12-slim +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps gzip bzip2 && \ + rm -rf /var/lib/apt/lists/* + +ENV STAR_VERSION 2.7.11b +ENV PACKAGES gcc g++ make wget zlib1g-dev unzip xxd +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd / && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pyyaml" + +RUN STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component star star_align_reads" +LABEL org.opencontainers.image.created="2025-03-06T09:19:35Z" +LABEL org.opencontainers.image.source="https://github.com/alexdobin/STAR" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "star_align_reads main" @@ -1617,426 +2037,32 @@ function ViashHelp { echo " The output file containing the alignments to transcriptome in BAM" echo " formats. This file is generated when --quantMode is set to" echo " TranscriptomeSAM." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM python:3.12-slim -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps gzip bzip2 && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION 2.7.11b -ENV PACKAGES gcc g++ make wget zlib1g-dev unzip xxd -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd / && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - apt-get --purge autoremove -y ${PACKAGES} && \ - apt-get clean - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pyyaml" - -RUN STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component star star_align_reads" -LABEL org.opencontainers.image.created="2024-12-03T10:33:54Z" -LABEL org.opencontainers.image.source="https://github.com/alexdobin/STAR" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/star/star_genome_generate/.config.vsh.yaml b/target/executable/star/star_genome_generate/.config.vsh.yaml index caf1333c..13a918a9 100644 --- a/target/executable/star/star_genome_generate/.config.vsh.yaml +++ b/target/executable/star/star_genome_generate/.config.vsh.yaml @@ -221,6 +221,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -332,16 +335,16 @@ build_info: engine: "docker|native" output: "target/executable/star/star_genome_generate" executable: "target/executable/star/star_genome_generate/star_genome_generate" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/star/star_genome_generate/star_genome_generate b/target/executable/star/star_genome_generate/star_genome_generate index 6e1f62a4..fefdf299 100755 --- a/target/executable/star/star_genome_generate/star_genome_generate +++ b/target/executable/star/star_genome_generate/star_genome_generate @@ -2,7 +2,7 @@ # star_genome_generate main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,116 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "star_genome_generate main" - echo "" - echo "Create index for STAR" - echo "" - echo "Input:" - echo " --genome_fasta_files" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Path(s) to the fasta files with the genome sequences, separated by" - echo " spaces. These files should be plain text FASTA files, they *cannot* be" - echo " zipped." - echo "" - echo " --sjdb_gtf_file" - echo " type: file, file must exist" - echo " Path to the GTF file with annotations" - echo "" - echo " --sjdb_overhang" - echo " type: integer" - echo " example: 100" - echo " Length of the donor/acceptor sequence on each side of the junctions," - echo " ideally = (mate_length - 1)" - echo "" - echo " --sjdb_gtf_chr_prefix" - echo " type: string" - echo " Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" - echo " annotations with UCSC genomes)" - echo "" - echo " --sjdb_gtf_feature_exon" - echo " type: string" - echo " example: exon" - echo " Feature type in GTF file to be used as exons for building transcripts" - echo "" - echo " --sjdb_gtf_tag_exon_parent_transcript" - echo " type: string" - echo " example: transcript_id" - echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" - echo " works for GTF files)" - echo "" - echo " --sjdb_gtf_tag_exon_parent_gene" - echo " type: string" - echo " example: gene_id" - echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" - echo " files)" - echo "" - echo " --sjdb_gtf_tag_exon_parent_gene_name" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " GTF attribute name for parent gene name" - echo "" - echo " --sjdb_gtf_tag_exon_parent_gene_type" - echo " type: string, multiple values allowed" - echo " example: gene_type;gene_biotype" - echo " GTF attribute name for parent gene type" - echo "" - echo " --limit_genome_generate_ram" - echo " type: long" - echo " example: 31000000000" - echo " Maximum available RAM (bytes) for genome generation" - echo "" - echo " --genome_sa_index_nbases" - echo " type: integer" - echo " example: 14" - echo " Length (bases) of the SA pre-indexing string. Typically between 10 and" - echo " 15. Longer strings will use much more memory, but allow faster searches." - echo " For small genomes, this parameter must be scaled down to min(14," - echo " log2(GenomeLength)/2 - 1)." - echo "" - echo " --genome_chr_bin_nbits" - echo " type: integer" - echo " example: 18" - echo " Defined as log2(chrBin), where chrBin is the size of the bins for genome" - echo " storage. Each chromosome will occupy an integer number of bins. For a" - echo " genome with large number of contigs, it is recommended to scale this" - echo " parameter as min(18," - echo " log2[max(GenomeLength/NumberOfReferences,ReadLength)])." - echo "" - echo " --genome_sa_sparse_d" - echo " type: integer" - echo " example: 1" - echo " min: 0" - echo " Suffux array sparsity, i.e. distance between indices. Use bigger numbers" - echo " to decrease needed RAM at the cost of mapping speed reduction." - echo "" - echo " --genome_suffix_length_max" - echo " type: integer" - echo " example: -1" - echo " Maximum length of the suffixes, has to be longer than read length. Use" - echo " -1 for infinite length." - echo "" - echo " --genome_transform_type" - echo " type: string" - echo " example: None" - echo " Type of genome transformation" - echo " None ... no transformation" - echo " Haploid ... replace reference alleles with alternative alleles from" - echo " VCF file (e.g. consensus allele)" - echo " Diploid ... create two haplotypes for each chromosome listed in VCF" - echo " file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)" - echo "" - echo " --genome_transform_vcf" - echo " type: file, file must exist" - echo " path to VCF file for genome transformation" - echo "" - echo "Output:" - echo " --index" - echo " type: file, required parameter, output, file must exist" - echo " default: STAR_index" - echo " STAR index directory." -} # initialise variables VIASH_MODE='run' @@ -577,9 +467,9 @@ RUN STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" LABEL org.opencontainers.image.description="Companion container for running component star star_genome_generate" -LABEL org.opencontainers.image.created="2024-12-03T10:33:55Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" LABEL org.opencontainers.image.source="https://github.com/alexdobin/STAR" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -694,6 +584,142 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "star_genome_generate main" + echo "" + echo "Create index for STAR" + echo "" + echo "Input:" + echo " --genome_fasta_files" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Path(s) to the fasta files with the genome sequences, separated by" + echo " spaces. These files should be plain text FASTA files, they *cannot* be" + echo " zipped." + echo "" + echo " --sjdb_gtf_file" + echo " type: file, file must exist" + echo " Path to the GTF file with annotations" + echo "" + echo " --sjdb_overhang" + echo " type: integer" + echo " example: 100" + echo " Length of the donor/acceptor sequence on each side of the junctions," + echo " ideally = (mate_length - 1)" + echo "" + echo " --sjdb_gtf_chr_prefix" + echo " type: string" + echo " Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" + echo " annotations with UCSC genomes)" + echo "" + echo " --sjdb_gtf_feature_exon" + echo " type: string" + echo " example: exon" + echo " Feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --sjdb_gtf_tag_exon_parent_transcript" + echo " type: string" + echo " example: transcript_id" + echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" + echo " works for GTF files)" + echo "" + echo " --sjdb_gtf_tag_exon_parent_gene" + echo " type: string" + echo " example: gene_id" + echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" + echo " files)" + echo "" + echo " --sjdb_gtf_tag_exon_parent_gene_name" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " GTF attribute name for parent gene name" + echo "" + echo " --sjdb_gtf_tag_exon_parent_gene_type" + echo " type: string, multiple values allowed" + echo " example: gene_type;gene_biotype" + echo " GTF attribute name for parent gene type" + echo "" + echo " --limit_genome_generate_ram" + echo " type: long" + echo " example: 31000000000" + echo " Maximum available RAM (bytes) for genome generation" + echo "" + echo " --genome_sa_index_nbases" + echo " type: integer" + echo " example: 14" + echo " Length (bases) of the SA pre-indexing string. Typically between 10 and" + echo " 15. Longer strings will use much more memory, but allow faster searches." + echo " For small genomes, this parameter must be scaled down to min(14," + echo " log2(GenomeLength)/2 - 1)." + echo "" + echo " --genome_chr_bin_nbits" + echo " type: integer" + echo " example: 18" + echo " Defined as log2(chrBin), where chrBin is the size of the bins for genome" + echo " storage. Each chromosome will occupy an integer number of bins. For a" + echo " genome with large number of contigs, it is recommended to scale this" + echo " parameter as min(18," + echo " log2[max(GenomeLength/NumberOfReferences,ReadLength)])." + echo "" + echo " --genome_sa_sparse_d" + echo " type: integer" + echo " example: 1" + echo " min: 0" + echo " Suffux array sparsity, i.e. distance between indices. Use bigger numbers" + echo " to decrease needed RAM at the cost of mapping speed reduction." + echo "" + echo " --genome_suffix_length_max" + echo " type: integer" + echo " example: -1" + echo " Maximum length of the suffixes, has to be longer than read length. Use" + echo " -1 for infinite length." + echo "" + echo " --genome_transform_type" + echo " type: string" + echo " example: None" + echo " Type of genome transformation" + echo " None ... no transformation" + echo " Haploid ... replace reference alleles with alternative alleles from" + echo " VCF file (e.g. consensus allele)" + echo " Diploid ... create two haplotypes for each chromosome listed in VCF" + echo " file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome)" + echo "" + echo " --genome_transform_vcf" + echo " type: file, file must exist" + echo " path to VCF file for genome transformation" + echo "" + echo "Output:" + echo " --index" + echo " type: file, required parameter, output, file must exist" + echo " default: STAR_index" + echo " STAR index directory." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' @@ -1429,10 +1455,10 @@ STAR \\ \${par_genome_sa_index_nbases:+--genomeSAindexNbases "\${par_genome_sa_index_nbases}"} \\ \${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "\${par_sjdb_gtf_chr_prefix}"} \\ \${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "\${par_sjdb_gtf_feature_exon}"} \\ - \${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtag_exon_parent_transcript "\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\ - \${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtag_exon_parent_gene "\${par_sjdb_gtf_tag_exon_parent_gene}"} \\ - \${par_sjdb_gtf_tag_exon_parent_geneName:+--sjdbGTFtag_exon_parent_geneName "\${par_sjdb_gtf_tag_exon_parent_geneName}"} \\ - \${par_sjdb_gtf_tag_exon_parent_geneType:+--sjdbGTFtag_exon_parent_geneType "\${sjdbGTFtag_exon_parent_geneType}"} \\ + \${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtagExonParentTranscript "\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\ + \${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtagExonParentGene "\${par_sjdb_gtf_tag_exon_parent_gene}"} \\ + \${sjdb_gtf_tag_exon_parent_gene_name:+--sjdbGTFtagExonParentGeneName "\${sjdb_gtf_tag_exon_parent_gene_name}"} \\ + \${sjdb_gtf_tag_exon_parent_gene_type:+--sjdbGTFtagExonParentGeneType "\${sjdb_gtf_tag_exon_parent_gene_type}"} \\ \${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "\${par_limit_genome_generate_ram}"} \\ \${par_genome_chr_bin_nbits:+--genomeChrBinNbits "\${par_genome_chr_bin_nbits}"} \\ \${par_genome_sa_sparse_d:+--genomeSAsparseD "\${par_genome_sa_sparse_d}"} \\ diff --git a/target/executable/trimgalore/.config.vsh.yaml b/target/executable/trimgalore/.config.vsh.yaml index 8d9beabd..d13aeff1 100644 --- a/target/executable/trimgalore/.config.vsh.yaml +++ b/target/executable/trimgalore/.config.vsh.yaml @@ -668,6 +668,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -769,16 +772,16 @@ build_info: engine: "docker|native" output: "target/executable/trimgalore" executable: "target/executable/trimgalore/trimgalore" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/trimgalore/trimgalore b/target/executable/trimgalore/trimgalore index ab00133c..6f72b107 100755 --- a/target/executable/trimgalore/trimgalore +++ b/target/executable/trimgalore/trimgalore @@ -2,7 +2,7 @@ # trimgalore main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0 +ENTRYPOINT [] +RUN echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" +LABEL org.opencontainers.image.description="Companion container for running component trimgalore" +LABEL org.opencontainers.image.created="2025-03-06T09:19:36Z" +LABEL org.opencontainers.image.source="https://github.com/FelixKrueger/TrimGalore" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "trimgalore main" @@ -600,404 +998,32 @@ function ViashHelp { echo " example: unpaired_read_2.fastq" echo " Output file for unpaired read 2. Only works when 1 file (single-end) or" echo " 2 files (paired-end) are specified, but not for longer lists." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0 -ENTRYPOINT [] -RUN echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa" -LABEL org.opencontainers.image.description="Companion container for running component trimgalore" -LABEL org.opencontainers.image.created="2024-12-03T10:33:56Z" -LABEL org.opencontainers.image.source="https://github.com/FelixKrueger/TrimGalore" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/umi_tools/umi_tools_dedup/.config.vsh.yaml b/target/executable/umi_tools/umi_tools_dedup/.config.vsh.yaml index cd74403c..2cf5622e 100644 --- a/target/executable/umi_tools/umi_tools_dedup/.config.vsh.yaml +++ b/target/executable/umi_tools/umi_tools_dedup/.config.vsh.yaml @@ -509,6 +509,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -610,16 +613,16 @@ build_info: engine: "docker|native" output: "target/executable/umi_tools/umi_tools_dedup" executable: "target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup b/target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup index 676e6b77..e65dd352 100755 --- a/target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup +++ b/target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup @@ -2,7 +2,7 @@ # umi_tools_dedup main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -172,6 +172,404 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1 +ENTRYPOINT [] +RUN umi_tools -v | sed 's/ version//g' > /var/software_versions.txt + +LABEL org.opencontainers.image.authors="Emma Rousseau" +LABEL org.opencontainers.image.description="Companion container for running component umi_tools umi_tools_dedup" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" +LABEL org.opencontainers.image.source="https://github.com/CGATOxford/UMI-tools" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "umi_tools_dedup main" @@ -489,404 +887,32 @@ function ViashHelp { echo " --timeit_header" echo " type: string" echo " Add header for timing information." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1 -ENTRYPOINT [] -RUN umi_tools -v | sed 's/ version//g' > /var/software_versions.txt - -LABEL org.opencontainers.image.authors="Emma Rousseau" -LABEL org.opencontainers.image.description="Companion container for running component umi_tools umi_tools_dedup" -LABEL org.opencontainers.image.created="2024-12-03T10:33:57Z" -LABEL org.opencontainers.image.source="https://github.com/CGATOxford/UMI-tools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/umi_tools/umi_tools_extract/.config.vsh.yaml b/target/executable/umi_tools/umi_tools_extract/.config.vsh.yaml index 6fc5a531..b173621c 100644 --- a/target/executable/umi_tools/umi_tools_extract/.config.vsh.yaml +++ b/target/executable/umi_tools/umi_tools_extract/.config.vsh.yaml @@ -331,6 +331,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -433,16 +436,16 @@ build_info: engine: "docker|native" output: "target/executable/umi_tools/umi_tools_extract" executable: "target/executable/umi_tools/umi_tools_extract/umi_tools_extract" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/umi_tools/umi_tools_extract/umi_tools_extract b/target/executable/umi_tools/umi_tools_extract/umi_tools_extract index cde49724..84b648a6 100755 --- a/target/executable/umi_tools/umi_tools_extract/umi_tools_extract +++ b/target/executable/umi_tools/umi_tools_extract/umi_tools_extract @@ -2,7 +2,7 @@ # umi_tools_extract main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,188 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umi_tools_extract main" - echo "" - echo "Flexible removal of UMI sequences from fastq reads." - echo "UMIs are removed and appended to the read name. Any other barcode, for example a" - echo "library barcode," - echo "is left on the read. Can also filter reads by quality or against a whitelist." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: sample.fastq" - echo " File containing the input data." - echo "" - echo " --read2_in" - echo " type: file, file must exist" - echo " example: sample_R2.fastq" - echo " File containing the input data for the R2 reads (if paired). If" - echo " provided, a need to be provided." - echo "" - echo " -p, --bc_pattern" - echo " type: string" - echo " The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6" - echo " nucleotides" - echo " of the read are from the UMI." - echo "" - echo " --bc_pattern2" - echo " type: string" - echo " The UMI barcode pattern to use for read 2." - echo "" - echo "Output:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Output file for read 1." - echo "" - echo " --read2_out" - echo " type: file, output, file must exist" - echo " Output file for read 2." - echo "" - echo " --filtered_out" - echo " type: file, file must exist" - echo " Write out reads not matching regex pattern or cell barcode whitelist to" - echo " this file." - echo "" - echo " --filtered_out2" - echo " type: file, file must exist" - echo " Write out read pairs not matching regex pattern or cell barcode" - echo " whitelist to this file." - echo "" - echo "Extract Options:" - echo " --extract_method" - echo " type: string" - echo " example: string" - echo " choices: [ string, regex ]" - echo " UMI pattern to use. Default: \`string\`." - echo "" - echo " --error_correct_cell" - echo " type: boolean_true" - echo " Error correct cell barcodes to the whitelist." - echo "" - echo " --whitelist" - echo " type: file, file must exist" - echo " Whitelist of accepted cell barcodes tab-separated format, where column 1" - echo " is the whitelisted" - echo " cell barcodes and column 2 is the list (comma-separated) of other cell" - echo " barcodes which should" - echo " be corrected to the barcode in column 1. If the --error_correct_cell" - echo " option is not used, this" - echo " column will be ignored." - echo "" - echo " --blacklist" - echo " type: file, file must exist" - echo " BlackWhitelist of cell barcodes to discard." - echo "" - echo " --subset_reads" - echo " type: integer" - echo " Only parse the first N reads." - echo "" - echo " --quality_filter_threshold" - echo " type: integer" - echo " Remove reads where any UMI base quality score falls below this" - echo " threshold." - echo "" - echo " --quality_filter_mask" - echo " type: string" - echo " If a UMI base has a quality below this threshold, replace the base with" - echo " 'N'." - echo "" - echo " --quality_encoding" - echo " type: string" - echo " choices: [ phred33, phred64, solexa ]" - echo " Quality score encoding. Choose from:" - echo " * phred33 [33-77]" - echo " * phred64 [64-106]" - echo " * solexa [59-106]" - echo "" - echo " --reconcile_pairs" - echo " type: boolean_true" - echo " Allow read 2 infile to contain reads not in read 1 infile. This enables" - echo " support for upstream protocols" - echo " where read one contains cell barcodes, and the read pairs have been" - echo " filtered and corrected without regard" - echo " to the read2." - echo "" - echo " --3prime, --three_prime" - echo " type: boolean_true" - echo " By default the barcode is assumed to be on the 5' end of the read, but" - echo " use this option to sepecify that it is" - echo " on the 3' end instead. This option only works with" - echo " --extract_method=string since 3' encoding can be specified" - echo " explicitly with a regex, e.g \`.*(?P.{5})\$\`." - echo "" - echo " --ignore_read_pair_suffixes" - echo " type: boolean_true" - echo " Ignore \"/1\" and \"/2\" read name suffixes. Note that this options is" - echo " required if the suffixes are not whitespace" - echo " separated from the rest of the read name." - echo " arguments:" - echo "" - echo " --umi_separator" - echo " type: string" - echo " example: _" - echo " The character that separates the UMI in the read name. Most likely a" - echo " colon if you skipped the extraction with" - echo " UMI-tools and used other software. Default: \`_\`" - echo "" - echo " --grouping_method" - echo " type: string" - echo " example: directional" - echo " choices: [ unique, percentile, cluster, adjacency, directional ]" - echo " Method to use to determine read groups by subsuming those with similar" - echo " UMIs. All methods start by identifying" - echo " the reads with the same mapping position, but treat similar yet" - echo " nonidentical UMIs differently. Default: \`directional\`" - echo "" - echo "Common Options:" - echo " --log" - echo " type: file, output, file must exist" - echo " File with logging information." - echo "" - echo " --log2stderr" - echo " type: boolean_true" - echo " Send logging information to stderr." - echo "" - echo " --verbose" - echo " type: integer" - echo " Log level. The higher, the more output." - echo "" - echo " --error" - echo " type: file, output, file must exist" - echo " File with error information." - echo "" - echo " --temp_dir" - echo " type: string" - echo " Directory for temporary files. If not set, the bash environmental" - echo " variable TMPDIR is used." - echo "" - echo " --compresslevel" - echo " type: integer" - echo " example: 6" - echo " Level of Gzip compression to use. Default=6 matches GNU gzip rather than" - echo " python gzip default (which is 9)." - echo " Default \`6\`." - echo "" - echo " --timeit" - echo " type: file, output, file must exist" - echo " Store timing information in file." - echo "" - echo " --timeit_name" - echo " type: string" - echo " default: all" - echo " Name in timing file for this class of jobs." - echo "" - echo " --timeit_header" - echo " type: boolean_true" - echo " Add header for timing information." - echo "" - echo " --random_seed" - echo " type: integer" - echo " Random seed to initialize number generator with." -} # initialise variables VIASH_MODE='run' @@ -630,9 +448,9 @@ ENTRYPOINT [] RUN umi_tools -v | sed 's/ version//g' > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component umi_tools umi_tools_extract" -LABEL org.opencontainers.image.created="2024-12-03T10:33:57Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" LABEL org.opencontainers.image.source="https://github.com/CGATOxford/UMI-tools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -747,6 +565,214 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "umi_tools_extract main" + echo "" + echo "Flexible removal of UMI sequences from fastq reads." + echo "UMIs are removed and appended to the read name. Any other barcode, for example a" + echo "library barcode," + echo "is left on the read. Can also filter reads by quality or against a whitelist." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: sample.fastq" + echo " File containing the input data." + echo "" + echo " --read2_in" + echo " type: file, file must exist" + echo " example: sample_R2.fastq" + echo " File containing the input data for the R2 reads (if paired). If" + echo " provided, a need to be provided." + echo "" + echo " -p, --bc_pattern" + echo " type: string" + echo " The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6" + echo " nucleotides" + echo " of the read are from the UMI." + echo "" + echo " --bc_pattern2" + echo " type: string" + echo " The UMI barcode pattern to use for read 2." + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Output file for read 1." + echo "" + echo " --read2_out" + echo " type: file, output, file must exist" + echo " Output file for read 2." + echo "" + echo " --filtered_out" + echo " type: file, file must exist" + echo " Write out reads not matching regex pattern or cell barcode whitelist to" + echo " this file." + echo "" + echo " --filtered_out2" + echo " type: file, file must exist" + echo " Write out read pairs not matching regex pattern or cell barcode" + echo " whitelist to this file." + echo "" + echo "Extract Options:" + echo " --extract_method" + echo " type: string" + echo " example: string" + echo " choices: [ string, regex ]" + echo " UMI pattern to use. Default: \`string\`." + echo "" + echo " --error_correct_cell" + echo " type: boolean_true" + echo " Error correct cell barcodes to the whitelist." + echo "" + echo " --whitelist" + echo " type: file, file must exist" + echo " Whitelist of accepted cell barcodes tab-separated format, where column 1" + echo " is the whitelisted" + echo " cell barcodes and column 2 is the list (comma-separated) of other cell" + echo " barcodes which should" + echo " be corrected to the barcode in column 1. If the --error_correct_cell" + echo " option is not used, this" + echo " column will be ignored." + echo "" + echo " --blacklist" + echo " type: file, file must exist" + echo " BlackWhitelist of cell barcodes to discard." + echo "" + echo " --subset_reads" + echo " type: integer" + echo " Only parse the first N reads." + echo "" + echo " --quality_filter_threshold" + echo " type: integer" + echo " Remove reads where any UMI base quality score falls below this" + echo " threshold." + echo "" + echo " --quality_filter_mask" + echo " type: string" + echo " If a UMI base has a quality below this threshold, replace the base with" + echo " 'N'." + echo "" + echo " --quality_encoding" + echo " type: string" + echo " choices: [ phred33, phred64, solexa ]" + echo " Quality score encoding. Choose from:" + echo " * phred33 [33-77]" + echo " * phred64 [64-106]" + echo " * solexa [59-106]" + echo "" + echo " --reconcile_pairs" + echo " type: boolean_true" + echo " Allow read 2 infile to contain reads not in read 1 infile. This enables" + echo " support for upstream protocols" + echo " where read one contains cell barcodes, and the read pairs have been" + echo " filtered and corrected without regard" + echo " to the read2." + echo "" + echo " --3prime, --three_prime" + echo " type: boolean_true" + echo " By default the barcode is assumed to be on the 5' end of the read, but" + echo " use this option to sepecify that it is" + echo " on the 3' end instead. This option only works with" + echo " --extract_method=string since 3' encoding can be specified" + echo " explicitly with a regex, e.g \`.*(?P.{5})\$\`." + echo "" + echo " --ignore_read_pair_suffixes" + echo " type: boolean_true" + echo " Ignore \"/1\" and \"/2\" read name suffixes. Note that this options is" + echo " required if the suffixes are not whitespace" + echo " separated from the rest of the read name." + echo " arguments:" + echo "" + echo " --umi_separator" + echo " type: string" + echo " example: _" + echo " The character that separates the UMI in the read name. Most likely a" + echo " colon if you skipped the extraction with" + echo " UMI-tools and used other software. Default: \`_\`" + echo "" + echo " --grouping_method" + echo " type: string" + echo " example: directional" + echo " choices: [ unique, percentile, cluster, adjacency, directional ]" + echo " Method to use to determine read groups by subsuming those with similar" + echo " UMIs. All methods start by identifying" + echo " the reads with the same mapping position, but treat similar yet" + echo " nonidentical UMIs differently. Default: \`directional\`" + echo "" + echo "Common Options:" + echo " --log" + echo " type: file, output, file must exist" + echo " File with logging information." + echo "" + echo " --log2stderr" + echo " type: boolean_true" + echo " Send logging information to stderr." + echo "" + echo " --verbose" + echo " type: integer" + echo " Log level. The higher, the more output." + echo "" + echo " --error" + echo " type: file, output, file must exist" + echo " File with error information." + echo "" + echo " --temp_dir" + echo " type: string" + echo " Directory for temporary files. If not set, the bash environmental" + echo " variable TMPDIR is used." + echo "" + echo " --compresslevel" + echo " type: integer" + echo " example: 6" + echo " Level of Gzip compression to use. Default=6 matches GNU gzip rather than" + echo " python gzip default (which is 9)." + echo " Default \`6\`." + echo "" + echo " --timeit" + echo " type: file, output, file must exist" + echo " Store timing information in file." + echo "" + echo " --timeit_name" + echo " type: string" + echo " default: all" + echo " Name in timing file for this class of jobs." + echo "" + echo " --timeit_header" + echo " type: boolean_true" + echo " Add header for timing information." + echo "" + echo " --random_seed" + echo " type: integer" + echo " Random seed to initialize number generator with." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml b/target/executable/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml index f3fa98b5..11726d20 100644 --- a/target/executable/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml +++ b/target/executable/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml @@ -153,6 +153,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -255,16 +258,16 @@ build_info: engine: "docker|native" output: "target/executable/umi_tools/umi_tools_prepareforrsem" executable: "target/executable/umi_tools/umi_tools_prepareforrsem/umi_tools_prepareforrsem" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/umi_tools/umi_tools_prepareforrsem/umi_tools_prepareforrsem b/target/executable/umi_tools/umi_tools_prepareforrsem/umi_tools_prepareforrsem index fabc416d..b89d7f03 100755 --- a/target/executable/umi_tools/umi_tools_prepareforrsem/umi_tools_prepareforrsem +++ b/target/executable/umi_tools/umi_tools_prepareforrsem/umi_tools_prepareforrsem @@ -2,7 +2,7 @@ # umi_tools_prepareforrsem main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,77 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umi_tools_prepareforrsem main" - echo "" - echo "Make the output from umi-tools dedup or group compatible with RSEM" - echo "" - echo "Input:" - echo " -I, --stdin, --input" - echo " type: file, required parameter, file must exist" - echo " example: \$id.transcriptome.bam" - echo "" - echo "Output:" - echo " -S, --stdout, --output" - echo " type: file, output, file must exist" - echo " example: \$id.transcriptome_sorted.bam" - echo "" - echo " -L, --log" - echo " type: file, output, file must exist" - echo " File with logging information [default = stdout]." - echo "" - echo " -E, --error" - echo " type: file, output, file must exist" - echo " File with error information [default = stderr]." - echo "" - echo " --log2stderr" - echo " type: boolean_true" - echo " Send logging information to stderr [default = False]." - echo "" - echo " --temp_dir" - echo " type: string" - echo " Directory for temporary files. If not set, the bash environmental" - echo " variable" - echo " TMPDIR is used." - echo "" - echo " --compresslevel" - echo " type: integer" - echo " Level of Gzip compression to use. Default (6) matchesGNU gzip rather" - echo " than python" - echo " gzip default (which is 9)." - echo "" - echo "Options:" - echo " --tags" - echo " type: string" - echo " example: UG,BX" - echo " Comma-seperated list of tags to transfer from read1 to read2 (Default:" - echo " 'UG,BX')" - echo "" - echo " --sam" - echo " type: boolean_true" - echo " Input and output SAM rather than BAM." - echo "" - echo " --timeit" - echo " type: string" - echo " Store timeing information in file [none]." - echo "" - echo " --timeit_name" - echo " type: string" - echo " Name in timing file for this class of jobs [all]." - echo "" - echo " --timeit_header" - echo " type: boolean_true" - echo " Add header for timing information [none]." - echo "" - echo " -v, --verbose" - echo " type: integer" - echo " Loglevel [1]. The higher, the more output." - echo "" - echo " --random_seed" - echo " type: integer" - echo " Random seed to initialize number generator with [none]." -} # initialise variables VIASH_MODE='run' @@ -519,9 +448,9 @@ ENTRYPOINT [] RUN umi_tools -v | sed 's/ version//g' > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component umi_tools umi_tools_prepareforrsem" -LABEL org.opencontainers.image.created="2024-12-03T10:33:58Z" +LABEL org.opencontainers.image.created="2025-03-06T09:19:40Z" LABEL org.opencontainers.image.source="https://github.com/CGATOxford/UMI-tools" -LABEL org.opencontainers.image.revision="952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" +LABEL org.opencontainers.image.revision="5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -636,6 +565,103 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "umi_tools_prepareforrsem main" + echo "" + echo "Make the output from umi-tools dedup or group compatible with RSEM" + echo "" + echo "Input:" + echo " -I, --stdin, --input" + echo " type: file, required parameter, file must exist" + echo " example: \$id.transcriptome.bam" + echo "" + echo "Output:" + echo " -S, --stdout, --output" + echo " type: file, output, file must exist" + echo " example: \$id.transcriptome_sorted.bam" + echo "" + echo " -L, --log" + echo " type: file, output, file must exist" + echo " File with logging information [default = stdout]." + echo "" + echo " -E, --error" + echo " type: file, output, file must exist" + echo " File with error information [default = stderr]." + echo "" + echo " --log2stderr" + echo " type: boolean_true" + echo " Send logging information to stderr [default = False]." + echo "" + echo " --temp_dir" + echo " type: string" + echo " Directory for temporary files. If not set, the bash environmental" + echo " variable" + echo " TMPDIR is used." + echo "" + echo " --compresslevel" + echo " type: integer" + echo " Level of Gzip compression to use. Default (6) matchesGNU gzip rather" + echo " than python" + echo " gzip default (which is 9)." + echo "" + echo "Options:" + echo " --tags" + echo " type: string" + echo " example: UG,BX" + echo " Comma-seperated list of tags to transfer from read1 to read2 (Default:" + echo " 'UG,BX')" + echo "" + echo " --sam" + echo " type: boolean_true" + echo " Input and output SAM rather than BAM." + echo "" + echo " --timeit" + echo " type: string" + echo " Store timeing information in file [none]." + echo "" + echo " --timeit_name" + echo " type: string" + echo " Name in timing file for this class of jobs [all]." + echo "" + echo " --timeit_header" + echo " type: boolean_true" + echo " Add header for timing information [none]." + echo "" + echo " -v, --verbose" + echo " type: integer" + echo " Loglevel [1]. The higher, the more output." + echo "" + echo " --random_seed" + echo " type: integer" + echo " Random seed to initialize number generator with [none]." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/nextflow/agat/agat_convert_bed2gff/.config.vsh.yaml b/target/nextflow/agat/agat_convert_bed2gff/.config.vsh.yaml index cfcb7718..2fe93e30 100644 --- a/target/nextflow/agat/agat_convert_bed2gff/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_bed2gff/.config.vsh.yaml @@ -133,6 +133,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,16 +237,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_bed2gff" executable: "target/nextflow/agat/agat_convert_bed2gff/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_bed2gff/main.nf b/target/nextflow/agat/agat_convert_bed2gff/main.nf index 9d203b76..cee1ff9c 100644 --- a/target/nextflow/agat/agat_convert_bed2gff/main.nf +++ b/target/nextflow/agat/agat_convert_bed2gff/main.nf @@ -1,6 +1,6 @@ // agat_convert_bed2gff main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2969,6 +3200,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3091,16 +3326,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_bed2gff", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_embl2gff/.config.vsh.yaml b/target/nextflow/agat/agat_convert_embl2gff/.config.vsh.yaml index a7bd8320..64344c46 100644 --- a/target/nextflow/agat/agat_convert_embl2gff/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_embl2gff/.config.vsh.yaml @@ -123,6 +123,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -224,16 +227,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_embl2gff" executable: "target/nextflow/agat/agat_convert_embl2gff/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_embl2gff/main.nf b/target/nextflow/agat/agat_convert_embl2gff/main.nf index 2da07938..63eaeaa9 100644 --- a/target/nextflow/agat/agat_convert_embl2gff/main.nf +++ b/target/nextflow/agat/agat_convert_embl2gff/main.nf @@ -1,6 +1,6 @@ // agat_convert_embl2gff main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2962,6 +3193,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3084,16 +3319,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_embl2gff", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_genscan2gff/.config.vsh.yaml b/target/nextflow/agat/agat_convert_genscan2gff/.config.vsh.yaml index 04291ad4..f1e26f96 100644 --- a/target/nextflow/agat/agat_convert_genscan2gff/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_genscan2gff/.config.vsh.yaml @@ -127,6 +127,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -229,16 +232,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_genscan2gff" executable: "target/nextflow/agat/agat_convert_genscan2gff/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_genscan2gff/main.nf b/target/nextflow/agat/agat_convert_genscan2gff/main.nf index 14e03d8e..80f0bc0f 100644 --- a/target/nextflow/agat/agat_convert_genscan2gff/main.nf +++ b/target/nextflow/agat/agat_convert_genscan2gff/main.nf @@ -1,6 +1,6 @@ // agat_convert_genscan2gff main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2963,6 +3194,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3086,16 +3321,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_genscan2gff", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_mfannot2gff/.config.vsh.yaml b/target/nextflow/agat/agat_convert_mfannot2gff/.config.vsh.yaml index 975300c9..f83cf2d2 100644 --- a/target/nextflow/agat/agat_convert_mfannot2gff/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_mfannot2gff/.config.vsh.yaml @@ -83,6 +83,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -90,7 +93,7 @@ keywords: - "gene annotations" - "GFF" - "Mfannot" -license: "GPL-3." +license: "GPL-3.0" references: doi: - "10.5281/zenodo.3552717" @@ -185,16 +188,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_mfannot2gff" executable: "target/nextflow/agat/agat_convert_mfannot2gff/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_mfannot2gff/main.nf b/target/nextflow/agat/agat_convert_mfannot2gff/main.nf index a6c8fbaa..e736efb4 100644 --- a/target/nextflow/agat/agat_convert_mfannot2gff/main.nf +++ b/target/nextflow/agat/agat_convert_mfannot2gff/main.nf @@ -1,6 +1,6 @@ // agat_convert_mfannot2gff main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2923,6 +3154,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2933,7 +3168,7 @@ meta = [ "GFF", "Mfannot" ], - "license" : "GPL-3.", + "license" : "GPL-3.0", "references" : { "doi" : [ "10.5281/zenodo.3552717" @@ -3046,16 +3281,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_mfannot2gff", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml b/target/nextflow/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml index a949071a..80902618 100644 --- a/target/nextflow/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_sp_gff2gtf/.config.vsh.yaml @@ -126,6 +126,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -227,16 +230,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_sp_gff2gtf" executable: "target/nextflow/agat/agat_convert_sp_gff2gtf/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_sp_gff2gtf/main.nf b/target/nextflow/agat/agat_convert_sp_gff2gtf/main.nf index 521ca40f..bd39ac2a 100644 --- a/target/nextflow/agat/agat_convert_sp_gff2gtf/main.nf +++ b/target/nextflow/agat/agat_convert_sp_gff2gtf/main.nf @@ -1,6 +1,6 @@ // agat_convert_sp_gff2gtf main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2945,6 +3176,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3067,16 +3302,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_sp_gff2gtf", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml b/target/nextflow/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml index 7c0bfbb3..007f8b57 100644 --- a/target/nextflow/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_sp_gff2tsv/.config.vsh.yaml @@ -86,6 +86,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -187,16 +190,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_sp_gff2tsv" executable: "target/nextflow/agat/agat_convert_sp_gff2tsv/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_sp_gff2tsv/main.nf b/target/nextflow/agat/agat_convert_sp_gff2tsv/main.nf index 6aea07dd..1f08d955 100644 --- a/target/nextflow/agat/agat_convert_sp_gff2tsv/main.nf +++ b/target/nextflow/agat/agat_convert_sp_gff2tsv/main.nf @@ -1,6 +1,6 @@ // agat_convert_sp_gff2tsv main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2923,6 +3154,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3045,16 +3280,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_sp_gff2tsv", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml b/target/nextflow/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml index 48929ad1..77473703 100644 --- a/target/nextflow/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml +++ b/target/nextflow/agat/agat_convert_sp_gxf2gxf/.config.vsh.yaml @@ -93,6 +93,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -194,16 +197,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_convert_sp_gxf2gxf" executable: "target/nextflow/agat/agat_convert_sp_gxf2gxf/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_convert_sp_gxf2gxf/main.nf b/target/nextflow/agat/agat_convert_sp_gxf2gxf/main.nf index 1ca00eef..d8ab2023 100644 --- a/target/nextflow/agat/agat_convert_sp_gxf2gxf/main.nf +++ b/target/nextflow/agat/agat_convert_sp_gxf2gxf/main.nf @@ -1,6 +1,6 @@ // agat_convert_sp_gxf2gxf main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2923,6 +3154,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3045,16 +3280,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_convert_sp_gxf2gxf", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_sp_add_introns/.config.vsh.yaml b/target/nextflow/agat/agat_sp_add_introns/.config.vsh.yaml index 826a1b06..acd15f72 100644 --- a/target/nextflow/agat/agat_sp_add_introns/.config.vsh.yaml +++ b/target/nextflow/agat/agat_sp_add_introns/.config.vsh.yaml @@ -84,6 +84,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -185,16 +188,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_sp_add_introns" executable: "target/nextflow/agat/agat_sp_add_introns/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_sp_add_introns/main.nf b/target/nextflow/agat/agat_sp_add_introns/main.nf index e19d0c93..a24ce412 100644 --- a/target/nextflow/agat/agat_sp_add_introns/main.nf +++ b/target/nextflow/agat/agat_sp_add_introns/main.nf @@ -1,6 +1,6 @@ // agat_sp_add_introns main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2926,6 +3157,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3048,16 +3283,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_sp_add_introns", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml b/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml index 497003e9..ac047278 100644 --- a/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml +++ b/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/.config.vsh.yaml @@ -133,6 +133,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -235,16 +238,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_sp_filter_feature_from_kill_list" executable: "target/nextflow/agat/agat_sp_filter_feature_from_kill_list/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/main.nf b/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/main.nf index dbaf11fb..118e3b70 100644 --- a/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/main.nf +++ b/target/nextflow/agat/agat_sp_filter_feature_from_kill_list/main.nf @@ -1,6 +1,6 @@ // agat_sp_filter_feature_from_kill_list main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2972,6 +3203,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3095,16 +3330,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_sp_filter_feature_from_kill_list", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_sp_merge_annotations/.config.vsh.yaml b/target/nextflow/agat/agat_sp_merge_annotations/.config.vsh.yaml index b9c1b1ee..d88240d5 100644 --- a/target/nextflow/agat/agat_sp_merge_annotations/.config.vsh.yaml +++ b/target/nextflow/agat/agat_sp_merge_annotations/.config.vsh.yaml @@ -81,6 +81,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -183,16 +186,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_sp_merge_annotations" executable: "target/nextflow/agat/agat_sp_merge_annotations/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_sp_merge_annotations/main.nf b/target/nextflow/agat/agat_sp_merge_annotations/main.nf index 65e7dd1a..7f10e2e9 100644 --- a/target/nextflow/agat/agat_sp_merge_annotations/main.nf +++ b/target/nextflow/agat/agat_sp_merge_annotations/main.nf @@ -1,6 +1,6 @@ // agat_sp_merge_annotations main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2922,6 +3153,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3045,16 +3280,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_sp_merge_annotations", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_sp_statistics/.config.vsh.yaml b/target/nextflow/agat/agat_sp_statistics/.config.vsh.yaml index c21f1646..907c01d6 100644 --- a/target/nextflow/agat/agat_sp_statistics/.config.vsh.yaml +++ b/target/nextflow/agat/agat_sp_statistics/.config.vsh.yaml @@ -128,6 +128,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -230,16 +233,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_sp_statistics" executable: "target/nextflow/agat/agat_sp_statistics/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_sp_statistics/main.nf b/target/nextflow/agat/agat_sp_statistics/main.nf index 3d7cf01c..7ab53b26 100644 --- a/target/nextflow/agat/agat_sp_statistics/main.nf +++ b/target/nextflow/agat/agat_sp_statistics/main.nf @@ -1,6 +1,6 @@ // agat_sp_statistics main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2972,6 +3203,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3095,16 +3330,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_sp_statistics", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/agat/agat_sq_stat_basic/.config.vsh.yaml b/target/nextflow/agat/agat_sq_stat_basic/.config.vsh.yaml index b9953201..ddb97ba0 100644 --- a/target/nextflow/agat/agat_sq_stat_basic/.config.vsh.yaml +++ b/target/nextflow/agat/agat_sq_stat_basic/.config.vsh.yaml @@ -124,6 +124,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -226,16 +229,16 @@ build_info: engine: "docker|native" output: "target/nextflow/agat/agat_sq_stat_basic" executable: "target/nextflow/agat/agat_sq_stat_basic/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/agat/agat_sq_stat_basic/main.nf b/target/nextflow/agat/agat_sq_stat_basic/main.nf index 291b12db..54192b06 100644 --- a/target/nextflow/agat/agat_sq_stat_basic/main.nf +++ b/target/nextflow/agat/agat_sq_stat_basic/main.nf @@ -1,6 +1,6 @@ // agat_sq_stat_basic main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2958,6 +3189,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3081,16 +3316,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/agat/agat_sq_stat_basic", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/arriba/.config.vsh.yaml b/target/nextflow/arriba/.config.vsh.yaml index adcc572c..908a7ca3 100644 --- a/target/nextflow/arriba/.config.vsh.yaml +++ b/target/nextflow/arriba/.config.vsh.yaml @@ -603,6 +603,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: cpus: 1 commands: @@ -705,16 +708,16 @@ build_info: engine: "docker|native" output: "target/nextflow/arriba" executable: "target/nextflow/arriba/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/arriba/main.nf b/target/nextflow/arriba/main.nf index 29fd7d44..c6c0825b 100644 --- a/target/nextflow/arriba/main.nf +++ b/target/nextflow/arriba/main.nf @@ -1,6 +1,6 @@ // arriba main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3469,6 +3700,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "cpus" : 1, "commands" : [ @@ -3591,16 +3826,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/arriba", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bases2fastq/.config.vsh.yaml b/target/nextflow/bases2fastq/.config.vsh.yaml index 803a290f..77adc60d 100644 --- a/target/nextflow/bases2fastq/.config.vsh.yaml +++ b/target/nextflow/bases2fastq/.config.vsh.yaml @@ -285,6 +285,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -393,16 +396,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bases2fastq" executable: "target/nextflow/bases2fastq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bases2fastq/main.nf b/target/nextflow/bases2fastq/main.nf index 984a44d2..3e96a16a 100644 --- a/target/nextflow/bases2fastq/main.nf +++ b/target/nextflow/bases2fastq/main.nf @@ -1,6 +1,6 @@ // bases2fastq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3149,6 +3380,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3283,16 +3518,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bases2fastq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml b/target/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml index d1152815..34bc672f 100644 --- a/target/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml +++ b/target/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml @@ -267,6 +267,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -367,16 +370,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bbmap/bbmap_bbsplit" executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bbmap/bbmap_bbsplit/main.nf b/target/nextflow/bbmap/bbmap_bbsplit/main.nf index be42fc55..5535732a 100644 --- a/target/nextflow/bbmap/bbmap_bbsplit/main.nf +++ b/target/nextflow/bbmap/bbmap_bbsplit/main.nf @@ -1,6 +1,6 @@ // bbmap_bbsplit main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3101,6 +3332,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3219,16 +3454,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bbmap/bbmap_bbsplit", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcftools/bcftools_annotate/.config.vsh.yaml b/target/nextflow/bcftools/bcftools_annotate/.config.vsh.yaml index 0bbc3442..04d7c3bb 100644 --- a/target/nextflow/bcftools/bcftools_annotate/.config.vsh.yaml +++ b/target/nextflow/bcftools/bcftools_annotate/.config.vsh.yaml @@ -355,6 +355,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -468,16 +471,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcftools/bcftools_annotate" executable: "target/nextflow/bcftools/bcftools_annotate/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcftools/bcftools_annotate/main.nf b/target/nextflow/bcftools/bcftools_annotate/main.nf index ff2d7bb7..6d1af5de 100644 --- a/target/nextflow/bcftools/bcftools_annotate/main.nf +++ b/target/nextflow/bcftools/bcftools_annotate/main.nf @@ -1,6 +1,6 @@ // bcftools_annotate main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3194,6 +3425,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3334,16 +3569,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcftools/bcftools_annotate", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcftools/bcftools_concat/.config.vsh.yaml b/target/nextflow/bcftools/bcftools_concat/.config.vsh.yaml index 55ad2d28..54bb83e5 100644 --- a/target/nextflow/bcftools/bcftools_concat/.config.vsh.yaml +++ b/target/nextflow/bcftools/bcftools_concat/.config.vsh.yaml @@ -221,6 +221,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -334,16 +337,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcftools/bcftools_concat" executable: "target/nextflow/bcftools/bcftools_concat/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcftools/bcftools_concat/main.nf b/target/nextflow/bcftools/bcftools_concat/main.nf index b08c31f4..aadfd27a 100644 --- a/target/nextflow/bcftools/bcftools_concat/main.nf +++ b/target/nextflow/bcftools/bcftools_concat/main.nf @@ -1,6 +1,6 @@ // bcftools_concat main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3066,6 +3297,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3206,16 +3441,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcftools/bcftools_concat", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcftools/bcftools_norm/.config.vsh.yaml b/target/nextflow/bcftools/bcftools_norm/.config.vsh.yaml index 1cc23827..95813eb0 100644 --- a/target/nextflow/bcftools/bcftools_norm/.config.vsh.yaml +++ b/target/nextflow/bcftools/bcftools_norm/.config.vsh.yaml @@ -302,6 +302,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -415,16 +418,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcftools/bcftools_norm" executable: "target/nextflow/bcftools/bcftools_norm/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcftools/bcftools_norm/main.nf b/target/nextflow/bcftools/bcftools_norm/main.nf index b96b07bb..08de353d 100644 --- a/target/nextflow/bcftools/bcftools_norm/main.nf +++ b/target/nextflow/bcftools/bcftools_norm/main.nf @@ -1,6 +1,6 @@ // bcftools_norm main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3159,6 +3390,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3299,16 +3534,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcftools/bcftools_norm", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcftools/bcftools_sort/.config.vsh.yaml b/target/nextflow/bcftools/bcftools_sort/.config.vsh.yaml index 2ee5be5f..5a27a99a 100644 --- a/target/nextflow/bcftools/bcftools_sort/.config.vsh.yaml +++ b/target/nextflow/bcftools/bcftools_sort/.config.vsh.yaml @@ -76,6 +76,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -184,16 +187,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcftools/bcftools_sort" executable: "target/nextflow/bcftools/bcftools_sort/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcftools/bcftools_sort/main.nf b/target/nextflow/bcftools/bcftools_sort/main.nf index 7973d6d2..7bd3a01b 100644 --- a/target/nextflow/bcftools/bcftools_sort/main.nf +++ b/target/nextflow/bcftools/bcftools_sort/main.nf @@ -1,6 +1,6 @@ // bcftools_sort main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2916,6 +3147,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3047,16 +3282,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcftools/bcftools_sort", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcftools/bcftools_stats/.config.vsh.yaml b/target/nextflow/bcftools/bcftools_stats/.config.vsh.yaml index 79c51a92..d3c84388 100644 --- a/target/nextflow/bcftools/bcftools_stats/.config.vsh.yaml +++ b/target/nextflow/bcftools/bcftools_stats/.config.vsh.yaml @@ -344,6 +344,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -457,16 +460,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcftools/bcftools_stats" executable: "target/nextflow/bcftools/bcftools_stats/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcftools/bcftools_stats/main.nf b/target/nextflow/bcftools/bcftools_stats/main.nf index cd95e4a9..b08d2156 100644 --- a/target/nextflow/bcftools/bcftools_stats/main.nf +++ b/target/nextflow/bcftools/bcftools_stats/main.nf @@ -1,6 +1,6 @@ // bcftools_stats main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3196,6 +3427,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3336,16 +3571,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcftools/bcftools_stats", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bcl_convert/.config.vsh.yaml b/target/nextflow/bcl_convert/.config.vsh.yaml index 8e3bc337..b0b7488e 100644 --- a/target/nextflow/bcl_convert/.config.vsh.yaml +++ b/target/nextflow/bcl_convert/.config.vsh.yaml @@ -290,6 +290,16 @@ argument_groups: direction: "output" multiple: false multiple_sep: ";" + - type: "boolean" + name: "--force" + description: "Allow destination directory to already exist and overwrite files.\n" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" @@ -303,6 +313,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -417,16 +430,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bcl_convert" executable: "target/nextflow/bcl_convert/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bcl_convert/main.nf b/target/nextflow/bcl_convert/main.nf index fa2cffc0..3ef6bcf2 100644 --- a/target/nextflow/bcl_convert/main.nf +++ b/target/nextflow/bcl_convert/main.nf @@ -1,6 +1,6 @@ // bcl_convert main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1727,8 +1886,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1741,7 +1898,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1753,33 +1910,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3172,6 +3403,18 @@ meta = [ "direction" : "output", "multiple" : false, "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--force", + "description" : "Allow destination directory to already exist and overwrite files.\n", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" } ] } @@ -3192,6 +3435,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3328,16 +3575,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bcl_convert", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ @@ -3395,6 +3642,7 @@ $( if [ ! -z ${VIASH_PAR_BCL_SAMPLEPROJECT_SUBDIRECTORIES+x} ]; then echo "${VIA $( if [ ! -z ${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL+x} ]; then echo "${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_gzip_compression_level='&'#" ; else echo "# par_fastq_gzip_compression_level="; fi ) $( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) $( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_FORCE+x} ]; then echo "${VIASH_PAR_FORCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_force='&'#" ; else echo "# par_force="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3419,9 +3667,13 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -eo pipefail +[[ "\\$par_force" == "false" ]] && unset par_force + + \\$(which bcl-convert) \\\\ --bcl-input-directory "\\$par_bcl_input_directory" \\\\ --output-directory "\\$par_output_directory" \\\\ + \\${par_force:+--force} \\\\ \\${par_sample_sheet:+ --sample-sheet "\\$par_sample_sheet"} \\\\ \\${par_run_info:+ --run-info "\\$par_run_info"} \\\\ \\${par_bcl_only_lane:+ --bcl-only-lane "\\$par_bcl_only_lane"} \\\\ diff --git a/target/nextflow/bcl_convert/nextflow_schema.json b/target/nextflow/bcl_convert/nextflow_schema.json index 2eec36de..9a5f8f8d 100644 --- a/target/nextflow/bcl_convert/nextflow_schema.json +++ b/target/nextflow/bcl_convert/nextflow_schema.json @@ -286,6 +286,16 @@ } + , + "force": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Allow destination directory to already exist and overwrite files", + "help_text": "Type: `boolean`, example: `true`. Allow destination directory to already exist and overwrite files.\n" + + } + + } }, diff --git a/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml b/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml index 276857a2..cd694ef5 100644 --- a/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml +++ b/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/.config.vsh.yaml @@ -159,6 +159,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -273,16 +276,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bd_rhapsody/bd_rhapsody_make_reference" executable: "target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/main.nf b/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/main.nf index c9451404..6fc23448 100644 --- a/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/main.nf +++ b/target/nextflow/bd_rhapsody/bd_rhapsody_make_reference/main.nf @@ -1,6 +1,6 @@ // bd_rhapsody_make_reference main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1727,8 +1886,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1741,7 +1898,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1753,33 +1910,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3004,6 +3235,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3145,16 +3380,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bd_rhapsody/bd_rhapsody_make_reference", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml b/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml index 7ef82413..49f74a12 100644 --- a/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml +++ b/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml @@ -989,6 +989,9 @@ test_resources: path: "helpers" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1114,16 +1117,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis" executable: "target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/main.nf b/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/main.nf index a76bbfa9..6fb0f07c 100644 --- a/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/main.nf +++ b/target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis/main.nf @@ -1,6 +1,6 @@ // bd_rhapsody_sequence_analysis main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1727,8 +1886,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1741,7 +1898,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1753,33 +1910,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -4036,6 +4267,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -4192,16 +4427,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bd_rhapsody/bd_rhapsody_sequence_analysis", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_bamtobed/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_bamtobed/.config.vsh.yaml index c2444185..32b4e9fd 100644 --- a/target/nextflow/bedtools/bedtools_bamtobed/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_bamtobed/.config.vsh.yaml @@ -124,6 +124,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,16 +237,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_bamtobed" executable: "target/nextflow/bedtools/bedtools_bamtobed/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_bamtobed/main.nf b/target/nextflow/bedtools/bedtools_bamtobed/main.nf index f4957ec7..d22683bd 100644 --- a/target/nextflow/bedtools/bedtools_bamtobed/main.nf +++ b/target/nextflow/bedtools/bedtools_bamtobed/main.nf @@ -1,6 +1,6 @@ // bedtools_bamtobed main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2967,6 +3198,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3100,16 +3335,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_bamtobed", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_bamtofastq/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_bamtofastq/.config.vsh.yaml index 495971a9..22a8671a 100644 --- a/target/nextflow/bedtools/bedtools_bamtofastq/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_bamtofastq/.config.vsh.yaml @@ -78,6 +78,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -186,16 +189,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_bamtofastq" executable: "target/nextflow/bedtools/bedtools_bamtofastq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_bamtofastq/main.nf b/target/nextflow/bedtools/bedtools_bamtofastq/main.nf index e2a2821e..96fb2bd3 100644 --- a/target/nextflow/bedtools/bedtools_bamtofastq/main.nf +++ b/target/nextflow/bedtools/bedtools_bamtofastq/main.nf @@ -1,6 +1,6 @@ // bedtools_bamtofastq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2918,6 +3149,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3049,16 +3284,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_bamtofastq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_bed12tobed6/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_bed12tobed6/.config.vsh.yaml index 1d509846..119a044b 100644 --- a/target/nextflow/bedtools/bedtools_bed12tobed6/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_bed12tobed6/.config.vsh.yaml @@ -67,6 +67,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -175,16 +178,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_bed12tobed6" executable: "target/nextflow/bedtools/bedtools_bed12tobed6/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_bed12tobed6/main.nf b/target/nextflow/bedtools/bedtools_bed12tobed6/main.nf index 16019625..6b41feac 100644 --- a/target/nextflow/bedtools/bedtools_bed12tobed6/main.nf +++ b/target/nextflow/bedtools/bedtools_bed12tobed6/main.nf @@ -1,6 +1,6 @@ // bedtools_bed12tobed6 main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2903,6 +3134,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3034,16 +3269,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_bed12tobed6", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_bedtobam/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_bedtobam/.config.vsh.yaml index a518d2a1..9f7449af 100644 --- a/target/nextflow/bedtools/bedtools_bedtobam/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_bedtobam/.config.vsh.yaml @@ -98,6 +98,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -213,16 +216,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_bedtobam" executable: "target/nextflow/bedtools/bedtools_bedtobam/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_bedtobam/main.nf b/target/nextflow/bedtools/bedtools_bedtobam/main.nf index 17adadae..81d6f526 100644 --- a/target/nextflow/bedtools/bedtools_bedtobam/main.nf +++ b/target/nextflow/bedtools/bedtools_bedtobam/main.nf @@ -1,6 +1,6 @@ // bedtools_bedtobam main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2940,6 +3171,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3082,16 +3317,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_bedtobam", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml index 49a48c44..a88cc9fa 100644 --- a/target/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml @@ -226,6 +226,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -336,16 +339,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_genomecov" executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_genomecov/main.nf b/target/nextflow/bedtools/bedtools_genomecov/main.nf index ff93428f..18aabc64 100644 --- a/target/nextflow/bedtools/bedtools_genomecov/main.nf +++ b/target/nextflow/bedtools/bedtools_genomecov/main.nf @@ -1,6 +1,6 @@ // bedtools_genomecov main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3073,6 +3304,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3206,16 +3441,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_genomecov", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_getfasta/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_getfasta/.config.vsh.yaml index dea43eb2..def30f30 100644 --- a/target/nextflow/bedtools/bedtools_getfasta/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_getfasta/.config.vsh.yaml @@ -123,6 +123,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -231,16 +234,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_getfasta" executable: "target/nextflow/bedtools/bedtools_getfasta/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_getfasta/main.nf b/target/nextflow/bedtools/bedtools_getfasta/main.nf index d7826b8f..d42c74c9 100644 --- a/target/nextflow/bedtools/bedtools_getfasta/main.nf +++ b/target/nextflow/bedtools/bedtools_getfasta/main.nf @@ -1,6 +1,6 @@ // bedtools_getfasta main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2954,6 +3185,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3085,16 +3320,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_getfasta", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_groupby/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_groupby/.config.vsh.yaml index 1cdd65d6..06b293d8 100644 --- a/target/nextflow/bedtools/bedtools_groupby/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_groupby/.config.vsh.yaml @@ -165,6 +165,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -272,16 +275,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_groupby" executable: "target/nextflow/bedtools/bedtools_groupby/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_groupby/main.nf b/target/nextflow/bedtools/bedtools_groupby/main.nf index e473e220..40091865 100644 --- a/target/nextflow/bedtools/bedtools_groupby/main.nf +++ b/target/nextflow/bedtools/bedtools_groupby/main.nf @@ -1,6 +1,6 @@ // bedtools_groupby main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2999,6 +3230,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3129,16 +3364,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_groupby", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_intersect/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_intersect/.config.vsh.yaml index add5f521..e5086a3e 100644 --- a/target/nextflow/bedtools/bedtools_intersect/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_intersect/.config.vsh.yaml @@ -301,6 +301,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -409,16 +412,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_intersect" executable: "target/nextflow/bedtools/bedtools_intersect/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_intersect/main.nf b/target/nextflow/bedtools/bedtools_intersect/main.nf index 06311b92..c390b898 100644 --- a/target/nextflow/bedtools/bedtools_intersect/main.nf +++ b/target/nextflow/bedtools/bedtools_intersect/main.nf @@ -1,6 +1,6 @@ // bedtools_intersect main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3150,6 +3381,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3281,16 +3516,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_intersect", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_links/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_links/.config.vsh.yaml index 489b4461..0efcf7ad 100644 --- a/target/nextflow/bedtools/bedtools_links/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_links/.config.vsh.yaml @@ -100,6 +100,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -209,16 +212,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_links" executable: "target/nextflow/bedtools/bedtools_links/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_links/main.nf b/target/nextflow/bedtools/bedtools_links/main.nf index 7dfc9e68..225d3f62 100644 --- a/target/nextflow/bedtools/bedtools_links/main.nf +++ b/target/nextflow/bedtools/bedtools_links/main.nf @@ -1,6 +1,6 @@ // bedtools_links main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2940,6 +3171,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3072,16 +3307,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_links", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_merge/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_merge/.config.vsh.yaml index fd43b1c2..07ca993b 100644 --- a/target/nextflow/bedtools/bedtools_merge/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_merge/.config.vsh.yaml @@ -174,6 +174,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -278,16 +281,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_merge" executable: "target/nextflow/bedtools/bedtools_merge/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_merge/main.nf b/target/nextflow/bedtools/bedtools_merge/main.nf index fc77ba2f..bbe032c6 100644 --- a/target/nextflow/bedtools/bedtools_merge/main.nf +++ b/target/nextflow/bedtools/bedtools_merge/main.nf @@ -1,6 +1,6 @@ // bedtools_merge main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3007,6 +3238,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3133,16 +3368,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_merge", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/bedtools/bedtools_sort/.config.vsh.yaml b/target/nextflow/bedtools/bedtools_sort/.config.vsh.yaml index 66a2fdac..ef220eca 100644 --- a/target/nextflow/bedtools/bedtools_sort/.config.vsh.yaml +++ b/target/nextflow/bedtools/bedtools_sort/.config.vsh.yaml @@ -114,6 +114,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -221,16 +224,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_sort" executable: "target/nextflow/bedtools/bedtools_sort/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools/bedtools_sort/main.nf b/target/nextflow/bedtools/bedtools_sort/main.nf index ed86b7a5..b15881c5 100644 --- a/target/nextflow/bedtools/bedtools_sort/main.nf +++ b/target/nextflow/bedtools/bedtools_sort/main.nf @@ -1,6 +1,6 @@ // bedtools_sort main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2961,6 +3192,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3091,16 +3326,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_sort", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/busco/busco_download_datasets/.config.vsh.yaml b/target/nextflow/busco/busco_download_datasets/.config.vsh.yaml index a874174e..1ad42c11 100644 --- a/target/nextflow/busco/busco_download_datasets/.config.vsh.yaml +++ b/target/nextflow/busco/busco_download_datasets/.config.vsh.yaml @@ -58,6 +58,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -157,16 +160,16 @@ build_info: engine: "docker|native" output: "target/nextflow/busco/busco_download_datasets" executable: "target/nextflow/busco/busco_download_datasets/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/busco/busco_download_datasets/main.nf b/target/nextflow/busco/busco_download_datasets/main.nf index 4c261314..cb5a23e3 100644 --- a/target/nextflow/busco/busco_download_datasets/main.nf +++ b/target/nextflow/busco/busco_download_datasets/main.nf @@ -1,6 +1,6 @@ // busco_download_datasets main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2890,6 +3121,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3010,16 +3245,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/busco/busco_download_datasets", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/busco/busco_list_datasets/.config.vsh.yaml b/target/nextflow/busco/busco_list_datasets/.config.vsh.yaml index c511dc27..0e805d82 100644 --- a/target/nextflow/busco/busco_list_datasets/.config.vsh.yaml +++ b/target/nextflow/busco/busco_list_datasets/.config.vsh.yaml @@ -45,6 +45,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -144,16 +147,16 @@ build_info: engine: "docker|native" output: "target/nextflow/busco/busco_list_datasets" executable: "target/nextflow/busco/busco_list_datasets/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/busco/busco_list_datasets/main.nf b/target/nextflow/busco/busco_list_datasets/main.nf index 6694dfc6..b19a9852 100644 --- a/target/nextflow/busco/busco_list_datasets/main.nf +++ b/target/nextflow/busco/busco_list_datasets/main.nf @@ -1,6 +1,6 @@ // busco_list_datasets main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2876,6 +3107,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2996,16 +3231,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/busco/busco_list_datasets", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/busco/busco_run/.config.vsh.yaml b/target/nextflow/busco/busco_run/.config.vsh.yaml index 52c6719b..fbac0abd 100644 --- a/target/nextflow/busco/busco_run/.config.vsh.yaml +++ b/target/nextflow/busco/busco_run/.config.vsh.yaml @@ -322,6 +322,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -422,16 +425,16 @@ build_info: engine: "docker|native" output: "target/nextflow/busco/busco_run" executable: "target/nextflow/busco/busco_run/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/busco/busco_run/main.nf b/target/nextflow/busco/busco_run/main.nf index df9b3f54..d773ed76 100644 --- a/target/nextflow/busco/busco_run/main.nf +++ b/target/nextflow/busco/busco_run/main.nf @@ -1,6 +1,6 @@ // busco_run main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3187,6 +3418,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3308,16 +3543,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/busco/busco_run", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/cellranger/cellranger_mkref/.config.vsh.yaml b/target/nextflow/cellranger/cellranger_mkref/.config.vsh.yaml index 582288b4..2486aa64 100644 --- a/target/nextflow/cellranger/cellranger_mkref/.config.vsh.yaml +++ b/target/nextflow/cellranger/cellranger_mkref/.config.vsh.yaml @@ -75,6 +75,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -194,16 +197,16 @@ build_info: engine: "docker|native" output: "target/nextflow/cellranger/cellranger_mkref" executable: "target/nextflow/cellranger/cellranger_mkref/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/cellranger/cellranger_mkref/main.nf b/target/nextflow/cellranger/cellranger_mkref/main.nf index 50c96604..dfaefea8 100644 --- a/target/nextflow/cellranger/cellranger_mkref/main.nf +++ b/target/nextflow/cellranger/cellranger_mkref/main.nf @@ -1,6 +1,6 @@ // cellranger_mkref main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2910,6 +3141,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3058,16 +3293,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/cellranger/cellranger_mkref", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/cutadapt/.config.vsh.yaml b/target/nextflow/cutadapt/.config.vsh.yaml index 7cbe26aa..e27f3d8a 100644 --- a/target/nextflow/cutadapt/.config.vsh.yaml +++ b/target/nextflow/cutadapt/.config.vsh.yaml @@ -633,6 +633,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -739,16 +742,16 @@ build_info: engine: "docker|native" output: "target/nextflow/cutadapt" executable: "target/nextflow/cutadapt/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/cutadapt/main.nf b/target/nextflow/cutadapt/main.nf index a7f02e16..a8309d4e 100644 --- a/target/nextflow/cutadapt/main.nf +++ b/target/nextflow/cutadapt/main.nf @@ -1,6 +1,6 @@ // cutadapt main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3488,6 +3719,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3618,16 +3853,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/cutadapt", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/falco/.config.vsh.yaml b/target/nextflow/falco/.config.vsh.yaml index 1040200b..9f76591a 100644 --- a/target/nextflow/falco/.config.vsh.yaml +++ b/target/nextflow/falco/.config.vsh.yaml @@ -203,6 +203,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -316,16 +319,16 @@ build_info: engine: "docker|native" output: "target/nextflow/falco" executable: "target/nextflow/falco/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/falco/main.nf b/target/nextflow/falco/main.nf index 25db12d1..7e1b7edf 100644 --- a/target/nextflow/falco/main.nf +++ b/target/nextflow/falco/main.nf @@ -1,6 +1,6 @@ // falco main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3031,6 +3262,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3169,16 +3404,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/falco", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/fastp/.config.vsh.yaml b/target/nextflow/fastp/.config.vsh.yaml index 14c8381b..be1bef7b 100644 --- a/target/nextflow/fastp/.config.vsh.yaml +++ b/target/nextflow/fastp/.config.vsh.yaml @@ -982,6 +982,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1082,16 +1085,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fastp" executable: "target/nextflow/fastp/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/fastp/main.nf b/target/nextflow/fastp/main.nf index b3df0432..945e6b4f 100644 --- a/target/nextflow/fastp/main.nf +++ b/target/nextflow/fastp/main.nf @@ -1,6 +1,6 @@ // fastp main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3901,6 +4132,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -4022,16 +4257,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fastp", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/fastqc/.config.vsh.yaml b/target/nextflow/fastqc/.config.vsh.yaml index fdd44163..3bffecbd 100644 --- a/target/nextflow/fastqc/.config.vsh.yaml +++ b/target/nextflow/fastqc/.config.vsh.yaml @@ -239,6 +239,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -339,16 +342,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fastqc" executable: "target/nextflow/fastqc/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/fastqc/main.nf b/target/nextflow/fastqc/main.nf index 221b751c..9305e651 100644 --- a/target/nextflow/fastqc/main.nf +++ b/target/nextflow/fastqc/main.nf @@ -1,6 +1,6 @@ // fastqc main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3062,6 +3293,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3181,16 +3416,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fastqc", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/featurecounts/.config.vsh.yaml b/target/nextflow/featurecounts/.config.vsh.yaml index f59e1f1a..51d03b41 100644 --- a/target/nextflow/featurecounts/.config.vsh.yaml +++ b/target/nextflow/featurecounts/.config.vsh.yaml @@ -543,6 +543,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -644,16 +647,16 @@ build_info: engine: "docker|native" output: "target/nextflow/featurecounts" executable: "target/nextflow/featurecounts/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/featurecounts/main.nf b/target/nextflow/featurecounts/main.nf index d9852f3d..107b2863 100644 --- a/target/nextflow/featurecounts/main.nf +++ b/target/nextflow/featurecounts/main.nf @@ -1,6 +1,6 @@ // featurecounts main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3427,6 +3658,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3548,16 +3783,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/featurecounts", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/fq_subsample/.config.vsh.yaml b/target/nextflow/fq_subsample/.config.vsh.yaml index d56f5cb9..33a112c8 100644 --- a/target/nextflow/fq_subsample/.config.vsh.yaml +++ b/target/nextflow/fq_subsample/.config.vsh.yaml @@ -88,6 +88,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -189,16 +192,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fq_subsample" executable: "target/nextflow/fq_subsample/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/fq_subsample/main.nf b/target/nextflow/fq_subsample/main.nf index ae3f2e3d..ffcf6e1c 100644 --- a/target/nextflow/fq_subsample/main.nf +++ b/target/nextflow/fq_subsample/main.nf @@ -1,6 +1,6 @@ // fq_subsample main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2914,6 +3145,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3031,16 +3266,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fq_subsample", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/gffread/.config.vsh.yaml b/target/nextflow/gffread/.config.vsh.yaml index 54dad4ba..c3dbdb94 100644 --- a/target/nextflow/gffread/.config.vsh.yaml +++ b/target/nextflow/gffread/.config.vsh.yaml @@ -582,6 +582,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -684,16 +687,16 @@ build_info: engine: "docker|native" output: "target/nextflow/gffread" executable: "target/nextflow/gffread/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/gffread/main.nf b/target/nextflow/gffread/main.nf index 0b9416be..c06d5283 100644 --- a/target/nextflow/gffread/main.nf +++ b/target/nextflow/gffread/main.nf @@ -1,6 +1,6 @@ // gffread main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3482,6 +3713,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3605,16 +3840,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/gffread", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml b/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml index adb39ffd..2803bc9d 100644 --- a/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml +++ b/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml @@ -113,6 +113,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -217,16 +220,16 @@ build_info: engine: "docker|native" output: "target/nextflow/kallisto/kallisto_index" executable: "target/nextflow/kallisto/kallisto_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/kallisto/kallisto_index/main.nf b/target/nextflow/kallisto/kallisto_index/main.nf index c324d4a8..00835142 100644 --- a/target/nextflow/kallisto/kallisto_index/main.nf +++ b/target/nextflow/kallisto/kallisto_index/main.nf @@ -1,6 +1,6 @@ // kallisto_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2948,6 +3179,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3070,16 +3305,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/kallisto/kallisto_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml b/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml index 75ae5e74..e3c16c0a 100644 --- a/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml +++ b/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml @@ -137,6 +137,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -245,16 +248,16 @@ build_info: engine: "docker|native" output: "target/nextflow/kallisto/kallisto_quant" executable: "target/nextflow/kallisto/kallisto_quant/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/kallisto/kallisto_quant/main.nf b/target/nextflow/kallisto/kallisto_quant/main.nf index d8f9d467..6bd49ee8 100644 --- a/target/nextflow/kallisto/kallisto_quant/main.nf +++ b/target/nextflow/kallisto/kallisto_quant/main.nf @@ -1,6 +1,6 @@ // kallisto_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2975,6 +3206,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3104,16 +3339,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/kallisto/kallisto_quant", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/lofreq/lofreq_call/.config.vsh.yaml b/target/nextflow/lofreq/lofreq_call/.config.vsh.yaml index b87e1f30..c350bec1 100644 --- a/target/nextflow/lofreq/lofreq_call/.config.vsh.yaml +++ b/target/nextflow/lofreq/lofreq_call/.config.vsh.yaml @@ -403,6 +403,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -506,16 +509,16 @@ build_info: engine: "docker|native" output: "target/nextflow/lofreq/lofreq_call" executable: "target/nextflow/lofreq/lofreq_call/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/lofreq/lofreq_call/main.nf b/target/nextflow/lofreq/lofreq_call/main.nf index ec74457f..a116bbe5 100644 --- a/target/nextflow/lofreq/lofreq_call/main.nf +++ b/target/nextflow/lofreq/lofreq_call/main.nf @@ -1,6 +1,6 @@ // lofreq_call main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3290,6 +3521,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3413,16 +3648,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/lofreq/lofreq_call", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/lofreq/lofreq_indelqual/.config.vsh.yaml b/target/nextflow/lofreq/lofreq_indelqual/.config.vsh.yaml index c7b51039..56a45ca7 100644 --- a/target/nextflow/lofreq/lofreq_indelqual/.config.vsh.yaml +++ b/target/nextflow/lofreq/lofreq_indelqual/.config.vsh.yaml @@ -109,6 +109,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -214,16 +217,16 @@ build_info: engine: "docker|native" output: "target/nextflow/lofreq/lofreq_indelqual" executable: "target/nextflow/lofreq/lofreq_indelqual/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/lofreq/lofreq_indelqual/main.nf b/target/nextflow/lofreq/lofreq_indelqual/main.nf index 9a41f985..13d0ab4c 100644 --- a/target/nextflow/lofreq/lofreq_indelqual/main.nf +++ b/target/nextflow/lofreq/lofreq_indelqual/main.nf @@ -1,6 +1,6 @@ // lofreq_indelqual main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2951,6 +3182,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3076,16 +3311,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/lofreq/lofreq_indelqual", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/multiqc/.config.vsh.yaml b/target/nextflow/multiqc/.config.vsh.yaml index 3475e44c..9e93ba81 100644 --- a/target/nextflow/multiqc/.config.vsh.yaml +++ b/target/nextflow/multiqc/.config.vsh.yaml @@ -357,6 +357,9 @@ info: doi: "10.1093/bioinformatics/btw354" licence: "GPL v3 or later" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -455,16 +458,16 @@ build_info: engine: "docker|native" output: "target/nextflow/multiqc" executable: "target/nextflow/multiqc/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/multiqc/main.nf b/target/nextflow/multiqc/main.nf index 71ef4c3a..097b488e 100644 --- a/target/nextflow/multiqc/main.nf +++ b/target/nextflow/multiqc/main.nf @@ -1,6 +1,6 @@ // multiqc main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3246,6 +3477,10 @@ meta = [ "licence" : "GPL v3 or later" }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3365,16 +3600,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/multiqc", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/nanoplot/.config.vsh.yaml b/target/nextflow/nanoplot/.config.vsh.yaml index 6cdbe1f9..544d5411 100644 --- a/target/nextflow/nanoplot/.config.vsh.yaml +++ b/target/nextflow/nanoplot/.config.vsh.yaml @@ -390,6 +390,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -491,16 +494,16 @@ build_info: engine: "docker|native" output: "target/nextflow/nanoplot" executable: "target/nextflow/nanoplot/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/nanoplot/main.nf b/target/nextflow/nanoplot/main.nf index 844558f1..cb13563e 100644 --- a/target/nextflow/nanoplot/main.nf +++ b/target/nextflow/nanoplot/main.nf @@ -1,6 +1,6 @@ // nanoplot main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3274,6 +3505,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3396,16 +3631,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/nanoplot", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/pear/.config.vsh.yaml b/target/nextflow/pear/.config.vsh.yaml index 0eaa52da..bd4c95b9 100644 --- a/target/nextflow/pear/.config.vsh.yaml +++ b/target/nextflow/pear/.config.vsh.yaml @@ -295,6 +295,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -397,16 +400,16 @@ build_info: engine: "docker|native" output: "target/nextflow/pear" executable: "target/nextflow/pear/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/pear/main.nf b/target/nextflow/pear/main.nf index 9a7f180f..9f299948 100644 --- a/target/nextflow/pear/main.nf +++ b/target/nextflow/pear/main.nf @@ -1,6 +1,6 @@ // pear main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3136,6 +3367,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3258,16 +3493,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/pear", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml b/target/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml index b4b05536..a659a086 100644 --- a/target/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml +++ b/target/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml @@ -161,6 +161,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -263,16 +266,16 @@ build_info: engine: "docker|native" output: "target/nextflow/qualimap/qualimap_rnaseq" executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/qualimap/qualimap_rnaseq/main.nf b/target/nextflow/qualimap/qualimap_rnaseq/main.nf index 5acb4baa..50c27e8d 100644 --- a/target/nextflow/qualimap/qualimap_rnaseq/main.nf +++ b/target/nextflow/qualimap/qualimap_rnaseq/main.nf @@ -1,6 +1,6 @@ // qualimap_rnaseq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3005,6 +3236,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3128,16 +3363,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/qualimap/qualimap_rnaseq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml index 4e942c80..4119280c 100644 --- a/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml +++ b/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml @@ -724,6 +724,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -851,16 +854,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rsem/rsem_calculate_expression" executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rsem/rsem_calculate_expression/main.nf b/target/nextflow/rsem/rsem_calculate_expression/main.nf index 56e7d179..142c3e1a 100644 --- a/target/nextflow/rsem/rsem_calculate_expression/main.nf +++ b/target/nextflow/rsem/rsem_calculate_expression/main.nf @@ -1,6 +1,6 @@ // rsem_calculate_expression main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3514,6 +3745,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3660,16 +3895,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rsem/rsem_calculate_expression", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml b/target/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml index 94266b8a..0bf22bf8 100644 --- a/target/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml +++ b/target/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml @@ -268,6 +268,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -415,16 +418,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rsem/rsem_prepare_reference" executable: "target/nextflow/rsem/rsem_prepare_reference/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rsem/rsem_prepare_reference/main.nf b/target/nextflow/rsem/rsem_prepare_reference/main.nf index e4f08ad0..d41661af 100644 --- a/target/nextflow/rsem/rsem_prepare_reference/main.nf +++ b/target/nextflow/rsem/rsem_prepare_reference/main.nf @@ -1,6 +1,6 @@ // rsem_prepare_reference main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3085,6 +3316,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3244,16 +3479,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rsem/rsem_prepare_reference", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml index a1eb2e26..bbeee969 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml @@ -68,6 +68,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -174,16 +177,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_bamstat" executable: "target/nextflow/rseqc/rseqc_bamstat/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_bamstat/main.nf b/target/nextflow/rseqc/rseqc_bamstat/main.nf index 043175f4..744ce51a 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/main.nf +++ b/target/nextflow/rseqc/rseqc_bamstat/main.nf @@ -1,6 +1,6 @@ // rseqc_bamstat main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2905,6 +3136,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3035,16 +3270,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_bamstat", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml index dd249054..74c12028 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml @@ -96,6 +96,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -200,16 +203,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_inferexperiment" executable: "target/nextflow/rseqc/rseqc_inferexperiment/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/main.nf b/target/nextflow/rseqc/rseqc_inferexperiment/main.nf index 8c96bf65..1592c7c7 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/main.nf +++ b/target/nextflow/rseqc/rseqc_inferexperiment/main.nf @@ -1,6 +1,6 @@ // rseqc_inferexperiment main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2942,6 +3173,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3068,16 +3303,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_inferexperiment", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml index 43639d09..ca1e49c5 100644 --- a/target/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml @@ -185,6 +185,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -293,16 +296,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_inner_distance" executable: "target/nextflow/rseqc/rseqc_inner_distance/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_inner_distance/main.nf b/target/nextflow/rseqc/rseqc_inner_distance/main.nf index 0a0e0ec0..c75f5ca6 100644 --- a/target/nextflow/rseqc/rseqc_inner_distance/main.nf +++ b/target/nextflow/rseqc/rseqc_inner_distance/main.nf @@ -1,6 +1,6 @@ // rseqc_inner_distance main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3035,6 +3266,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3168,16 +3403,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_inner_distance", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/salmon/salmon_index/.config.vsh.yaml b/target/nextflow/salmon/salmon_index/.config.vsh.yaml index 29e1d6a5..c806825b 100644 --- a/target/nextflow/salmon/salmon_index/.config.vsh.yaml +++ b/target/nextflow/salmon/salmon_index/.config.vsh.yaml @@ -176,6 +176,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -276,16 +279,16 @@ build_info: engine: "docker|native" output: "target/nextflow/salmon/salmon_index" executable: "target/nextflow/salmon/salmon_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/salmon/salmon_index/main.nf b/target/nextflow/salmon/salmon_index/main.nf index 33a48b3a..539f673c 100644 --- a/target/nextflow/salmon/salmon_index/main.nf +++ b/target/nextflow/salmon/salmon_index/main.nf @@ -1,6 +1,6 @@ // salmon_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3007,6 +3238,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3128,16 +3363,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/salmon/salmon_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/salmon/salmon_quant/.config.vsh.yaml b/target/nextflow/salmon/salmon_quant/.config.vsh.yaml index cdcde821..90e00ae0 100644 --- a/target/nextflow/salmon/salmon_quant/.config.vsh.yaml +++ b/target/nextflow/salmon/salmon_quant/.config.vsh.yaml @@ -1072,6 +1072,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1172,16 +1175,16 @@ build_info: engine: "docker|native" output: "target/nextflow/salmon/salmon_quant" executable: "target/nextflow/salmon/salmon_quant/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/salmon/salmon_quant/main.nf b/target/nextflow/salmon/salmon_quant/main.nf index c86bbd59..1e5235fa 100644 --- a/target/nextflow/salmon/salmon_quant/main.nf +++ b/target/nextflow/salmon/salmon_quant/main.nf @@ -1,6 +1,6 @@ // salmon_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3842,6 +4073,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3963,16 +4198,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/salmon/salmon_quant", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_collate/.config.vsh.yaml b/target/nextflow/samtools/samtools_collate/.config.vsh.yaml index dd352fbd..31cb199a 100644 --- a/target/nextflow/samtools/samtools_collate/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_collate/.config.vsh.yaml @@ -158,6 +158,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -263,16 +266,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_collate" executable: "target/nextflow/samtools/samtools_collate/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_collate/main.nf b/target/nextflow/samtools/samtools_collate/main.nf index cd738bce..8435fd2a 100644 --- a/target/nextflow/samtools/samtools_collate/main.nf +++ b/target/nextflow/samtools/samtools_collate/main.nf @@ -1,6 +1,6 @@ // samtools_collate main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3014,6 +3245,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3139,16 +3374,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_collate", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_faidx/.config.vsh.yaml b/target/nextflow/samtools/samtools_faidx/.config.vsh.yaml index 028cf6e8..8cbf015a 100644 --- a/target/nextflow/samtools/samtools_faidx/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_faidx/.config.vsh.yaml @@ -139,6 +139,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -242,16 +245,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_faidx" executable: "target/nextflow/samtools/samtools_faidx/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_faidx/main.nf b/target/nextflow/samtools/samtools_faidx/main.nf index 2cb06f78..c18b22fd 100644 --- a/target/nextflow/samtools/samtools_faidx/main.nf +++ b/target/nextflow/samtools/samtools_faidx/main.nf @@ -1,6 +1,6 @@ // samtools_faidx main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2988,6 +3219,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3111,16 +3346,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_faidx", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_fasta/.config.vsh.yaml b/target/nextflow/samtools/samtools_fasta/.config.vsh.yaml index 4cbd67fd..4c3d80bb 100644 --- a/target/nextflow/samtools/samtools_fasta/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_fasta/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -432,16 +435,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_fasta" executable: "target/nextflow/samtools/samtools_fasta/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_fasta/main.nf b/target/nextflow/samtools/samtools_fasta/main.nf index ff2f5523..1ab5de62 100644 --- a/target/nextflow/samtools/samtools_fasta/main.nf +++ b/target/nextflow/samtools/samtools_fasta/main.nf @@ -1,6 +1,6 @@ // samtools_fasta main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3179,6 +3410,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3303,16 +3538,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_fasta", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_fastq/.config.vsh.yaml b/target/nextflow/samtools/samtools_fastq/.config.vsh.yaml index 8dd1bf85..8cab0f46 100644 --- a/target/nextflow/samtools/samtools_fastq/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_fastq/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -432,16 +435,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_fastq" executable: "target/nextflow/samtools/samtools_fastq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_fastq/main.nf b/target/nextflow/samtools/samtools_fastq/main.nf index 03d73ad1..b61561a3 100644 --- a/target/nextflow/samtools/samtools_fastq/main.nf +++ b/target/nextflow/samtools/samtools_fastq/main.nf @@ -1,6 +1,6 @@ // samtools_fastq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3179,6 +3410,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3303,16 +3538,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_fastq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_flagstat/.config.vsh.yaml b/target/nextflow/samtools/samtools_flagstat/.config.vsh.yaml index 16adec46..d1cb5a55 100644 --- a/target/nextflow/samtools/samtools_flagstat/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_flagstat/.config.vsh.yaml @@ -66,6 +66,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -172,16 +175,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_flagstat" executable: "target/nextflow/samtools/samtools_flagstat/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_flagstat/main.nf b/target/nextflow/samtools/samtools_flagstat/main.nf index 31df84f1..b54a8b9c 100644 --- a/target/nextflow/samtools/samtools_flagstat/main.nf +++ b/target/nextflow/samtools/samtools_flagstat/main.nf @@ -1,6 +1,6 @@ // samtools_flagstat main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2901,6 +3132,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3027,16 +3262,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_flagstat", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_idxstats/.config.vsh.yaml b/target/nextflow/samtools/samtools_idxstats/.config.vsh.yaml index ef273e92..b1857a39 100644 --- a/target/nextflow/samtools/samtools_idxstats/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_idxstats/.config.vsh.yaml @@ -75,6 +75,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -182,16 +185,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_idxstats" executable: "target/nextflow/samtools/samtools_idxstats/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_idxstats/main.nf b/target/nextflow/samtools/samtools_idxstats/main.nf index 2729e99b..e52af7f6 100644 --- a/target/nextflow/samtools/samtools_idxstats/main.nf +++ b/target/nextflow/samtools/samtools_idxstats/main.nf @@ -1,6 +1,6 @@ // samtools_idxstats main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2912,6 +3143,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3039,16 +3274,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_idxstats", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_index/.config.vsh.yaml b/target/nextflow/samtools/samtools_index/.config.vsh.yaml index a4ffaa2e..ed45e4d9 100644 --- a/target/nextflow/samtools/samtools_index/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_index/.config.vsh.yaml @@ -84,6 +84,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -188,16 +191,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_index" executable: "target/nextflow/samtools/samtools_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_index/main.nf b/target/nextflow/samtools/samtools_index/main.nf index 85b710da..dd4a92b0 100644 --- a/target/nextflow/samtools/samtools_index/main.nf +++ b/target/nextflow/samtools/samtools_index/main.nf @@ -1,6 +1,6 @@ // samtools_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2928,6 +3159,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3052,16 +3287,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_sort/.config.vsh.yaml b/target/nextflow/samtools/samtools_sort/.config.vsh.yaml index d341c2f9..fc9783da 100644 --- a/target/nextflow/samtools/samtools_sort/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_sort/.config.vsh.yaml @@ -227,6 +227,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -331,16 +334,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_sort" executable: "target/nextflow/samtools/samtools_sort/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_sort/main.nf b/target/nextflow/samtools/samtools_sort/main.nf index 81aa788c..e269b9f2 100644 --- a/target/nextflow/samtools/samtools_sort/main.nf +++ b/target/nextflow/samtools/samtools_sort/main.nf @@ -1,6 +1,6 @@ // samtools_sort main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3100,6 +3331,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3224,16 +3459,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_sort", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_stats/.config.vsh.yaml b/target/nextflow/samtools/samtools_stats/.config.vsh.yaml index 208326ce..4db9eafc 100644 --- a/target/nextflow/samtools/samtools_stats/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_stats/.config.vsh.yaml @@ -295,6 +295,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -400,16 +403,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_stats" executable: "target/nextflow/samtools/samtools_stats/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_stats/main.nf b/target/nextflow/samtools/samtools_stats/main.nf index 958eac0c..00f50e56 100644 --- a/target/nextflow/samtools/samtools_stats/main.nf +++ b/target/nextflow/samtools/samtools_stats/main.nf @@ -1,6 +1,6 @@ // samtools_stats main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3169,6 +3400,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3294,16 +3529,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_stats", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/samtools/samtools_view/.config.vsh.yaml b/target/nextflow/samtools/samtools_view/.config.vsh.yaml index 50f9b5d3..f281231b 100644 --- a/target/nextflow/samtools/samtools_view/.config.vsh.yaml +++ b/target/nextflow/samtools/samtools_view/.config.vsh.yaml @@ -559,6 +559,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -664,16 +667,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_view" executable: "target/nextflow/samtools/samtools_view/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/samtools/samtools_view/main.nf b/target/nextflow/samtools/samtools_view/main.nf index 40a8728d..68ff5f34 100644 --- a/target/nextflow/samtools/samtools_view/main.nf +++ b/target/nextflow/samtools/samtools_view/main.nf @@ -1,6 +1,6 @@ // samtools_view main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3350,6 +3581,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3475,16 +3710,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_view", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/seqtk/seqtk_sample/.config.vsh.yaml b/target/nextflow/seqtk/seqtk_sample/.config.vsh.yaml index 45d3777c..a76aed02 100644 --- a/target/nextflow/seqtk/seqtk_sample/.config.vsh.yaml +++ b/target/nextflow/seqtk/seqtk_sample/.config.vsh.yaml @@ -80,6 +80,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -172,16 +175,16 @@ build_info: engine: "docker|native" output: "target/nextflow/seqtk/seqtk_sample" executable: "target/nextflow/seqtk/seqtk_sample/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/seqtk/seqtk_sample/main.nf b/target/nextflow/seqtk/seqtk_sample/main.nf index 2c87f2dc..483799d6 100644 --- a/target/nextflow/seqtk/seqtk_sample/main.nf +++ b/target/nextflow/seqtk/seqtk_sample/main.nf @@ -1,6 +1,6 @@ // seqtk_sample main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2922,6 +3153,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3029,16 +3264,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/seqtk/seqtk_sample", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/seqtk/seqtk_subseq/.config.vsh.yaml b/target/nextflow/seqtk/seqtk_subseq/.config.vsh.yaml index 8807886a..19cdb54f 100644 --- a/target/nextflow/seqtk/seqtk_subseq/.config.vsh.yaml +++ b/target/nextflow/seqtk/seqtk_subseq/.config.vsh.yaml @@ -99,6 +99,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,16 +198,16 @@ build_info: engine: "docker|native" output: "target/nextflow/seqtk/seqtk_subseq" executable: "target/nextflow/seqtk/seqtk_subseq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/seqtk/seqtk_subseq/main.nf b/target/nextflow/seqtk/seqtk_subseq/main.nf index d5da1598..b366919c 100644 --- a/target/nextflow/seqtk/seqtk_subseq/main.nf +++ b/target/nextflow/seqtk/seqtk_subseq/main.nf @@ -1,6 +1,6 @@ // seqtk_subseq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2944,6 +3175,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3059,16 +3294,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/seqtk/seqtk_subseq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/sgdemux/.config.vsh.yaml b/target/nextflow/sgdemux/.config.vsh.yaml index 4161714b..0e48c531 100644 --- a/target/nextflow/sgdemux/.config.vsh.yaml +++ b/target/nextflow/sgdemux/.config.vsh.yaml @@ -328,6 +328,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -428,16 +431,16 @@ build_info: engine: "docker|native" output: "target/nextflow/sgdemux" executable: "target/nextflow/sgdemux/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/sgdemux/main.nf b/target/nextflow/sgdemux/main.nf index 2defb908..cb95dced 100644 --- a/target/nextflow/sgdemux/main.nf +++ b/target/nextflow/sgdemux/main.nf @@ -1,6 +1,6 @@ // sgdemux main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3181,6 +3412,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3302,16 +3537,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/sgdemux", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/snpeff/.config.vsh.yaml b/target/nextflow/snpeff/.config.vsh.yaml index b597724e..fe4fcb79 100644 --- a/target/nextflow/snpeff/.config.vsh.yaml +++ b/target/nextflow/snpeff/.config.vsh.yaml @@ -523,6 +523,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -627,16 +630,16 @@ build_info: engine: "docker|native" output: "target/nextflow/snpeff" executable: "target/nextflow/snpeff/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/snpeff/main.nf b/target/nextflow/snpeff/main.nf index 3bfe5c5b..be55883e 100644 --- a/target/nextflow/snpeff/main.nf +++ b/target/nextflow/snpeff/main.nf @@ -1,6 +1,6 @@ // snpeff main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3430,6 +3661,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3554,16 +3789,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/snpeff", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/sortmerna/.config.vsh.yaml b/target/nextflow/sortmerna/.config.vsh.yaml index fff395c0..68783c77 100644 --- a/target/nextflow/sortmerna/.config.vsh.yaml +++ b/target/nextflow/sortmerna/.config.vsh.yaml @@ -487,6 +487,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -593,16 +596,16 @@ build_info: engine: "docker|native" output: "target/nextflow/sortmerna" executable: "target/nextflow/sortmerna/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/sortmerna/main.nf b/target/nextflow/sortmerna/main.nf index ba848a9f..1de70e61 100644 --- a/target/nextflow/sortmerna/main.nf +++ b/target/nextflow/sortmerna/main.nf @@ -1,6 +1,6 @@ // sortmerna main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3333,6 +3564,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3459,16 +3694,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/sortmerna", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/star/star_align_reads/.config.vsh.yaml b/target/nextflow/star/star_align_reads/.config.vsh.yaml index 86213b48..f05a813f 100644 --- a/target/nextflow/star/star_align_reads/.config.vsh.yaml +++ b/target/nextflow/star/star_align_reads/.config.vsh.yaml @@ -2540,6 +2540,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -2662,16 +2665,16 @@ build_info: engine: "docker|native" output: "target/nextflow/star/star_align_reads" executable: "target/nextflow/star/star_align_reads/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/star/star_align_reads/main.nf b/target/nextflow/star/star_align_reads/main.nf index 97daee43..845d9faa 100644 --- a/target/nextflow/star/star_align_reads/main.nf +++ b/target/nextflow/star/star_align_reads/main.nf @@ -1,6 +1,6 @@ // star_align_reads main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1727,8 +1886,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1741,7 +1898,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1753,33 +1910,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -5794,6 +6025,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -5942,16 +6177,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/star/star_align_reads", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/star/star_genome_generate/.config.vsh.yaml b/target/nextflow/star/star_genome_generate/.config.vsh.yaml index 2a127406..a3d3a324 100644 --- a/target/nextflow/star/star_genome_generate/.config.vsh.yaml +++ b/target/nextflow/star/star_genome_generate/.config.vsh.yaml @@ -221,6 +221,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -332,16 +335,16 @@ build_info: engine: "docker|native" output: "target/nextflow/star/star_genome_generate" executable: "target/nextflow/star/star_genome_generate/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/star/star_genome_generate/main.nf b/target/nextflow/star/star_genome_generate/main.nf index 17e745a0..a84bf6cd 100644 --- a/target/nextflow/star/star_genome_generate/main.nf +++ b/target/nextflow/star/star_genome_generate/main.nf @@ -1,6 +1,6 @@ // star_genome_generate main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3063,6 +3294,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3194,16 +3429,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/star/star_genome_generate", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ @@ -3292,10 +3527,10 @@ STAR \\\\ \\${par_genome_sa_index_nbases:+--genomeSAindexNbases "\\${par_genome_sa_index_nbases}"} \\\\ \\${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "\\${par_sjdb_gtf_chr_prefix}"} \\\\ \\${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "\\${par_sjdb_gtf_feature_exon}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtag_exon_parent_transcript "\\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtag_exon_parent_gene "\\${par_sjdb_gtf_tag_exon_parent_gene}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_geneName:+--sjdbGTFtag_exon_parent_geneName "\\${par_sjdb_gtf_tag_exon_parent_geneName}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_geneType:+--sjdbGTFtag_exon_parent_geneType "\\${sjdbGTFtag_exon_parent_geneType}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtagExonParentTranscript "\\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtagExonParentGene "\\${par_sjdb_gtf_tag_exon_parent_gene}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_name:+--sjdbGTFtagExonParentGeneName "\\${sjdb_gtf_tag_exon_parent_gene_name}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_type:+--sjdbGTFtagExonParentGeneType "\\${sjdb_gtf_tag_exon_parent_gene_type}"} \\\\ \\${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "\\${par_limit_genome_generate_ram}"} \\\\ \\${par_genome_chr_bin_nbits:+--genomeChrBinNbits "\\${par_genome_chr_bin_nbits}"} \\\\ \\${par_genome_sa_sparse_d:+--genomeSAsparseD "\\${par_genome_sa_sparse_d}"} \\\\ diff --git a/target/nextflow/trimgalore/.config.vsh.yaml b/target/nextflow/trimgalore/.config.vsh.yaml index a46f4921..296794ad 100644 --- a/target/nextflow/trimgalore/.config.vsh.yaml +++ b/target/nextflow/trimgalore/.config.vsh.yaml @@ -668,6 +668,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -769,16 +772,16 @@ build_info: engine: "docker|native" output: "target/nextflow/trimgalore" executable: "target/nextflow/trimgalore/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/trimgalore/main.nf b/target/nextflow/trimgalore/main.nf index c46ce511..dacfdc3b 100644 --- a/target/nextflow/trimgalore/main.nf +++ b/target/nextflow/trimgalore/main.nf @@ -1,6 +1,6 @@ // trimgalore main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3438,6 +3669,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3559,16 +3794,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/trimgalore", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml b/target/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml index 6296dff4..d268cb7d 100644 --- a/target/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml +++ b/target/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml @@ -509,6 +509,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -610,16 +613,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_dedup" executable: "target/nextflow/umi_tools/umi_tools_dedup/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/umi_tools/umi_tools_dedup/main.nf b/target/nextflow/umi_tools/umi_tools_dedup/main.nf index 910e67d1..4e0c2739 100644 --- a/target/nextflow/umi_tools/umi_tools_dedup/main.nf +++ b/target/nextflow/umi_tools/umi_tools_dedup/main.nf @@ -1,6 +1,6 @@ // umi_tools_dedup main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1885,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1897,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1909,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3364,6 +3595,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3486,16 +3721,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_dedup", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml b/target/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml index dea32d25..0fac1e2f 100644 --- a/target/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml +++ b/target/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml @@ -331,6 +331,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -433,16 +436,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_extract" executable: "target/nextflow/umi_tools/umi_tools_extract/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/umi_tools/umi_tools_extract/main.nf b/target/nextflow/umi_tools/umi_tools_extract/main.nf index af503a79..3b4950bf 100644 --- a/target/nextflow/umi_tools/umi_tools_extract/main.nf +++ b/target/nextflow/umi_tools/umi_tools_extract/main.nf @@ -1,6 +1,6 @@ // umi_tools_extract main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3175,6 +3406,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3298,16 +3533,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_extract", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml b/target/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml index 30bbe4a2..0c3f2392 100644 --- a/target/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml +++ b/target/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml @@ -153,6 +153,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -255,16 +258,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_prepareforrsem" executable: "target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.2" + git_commit: "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-30-g5f6516e" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf b/target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf index df5b652b..6fbffff0 100644 --- a/target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf +++ b/target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf @@ -1,6 +1,6 @@ // umi_tools_prepareforrsem main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2997,6 +3228,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3120,16 +3355,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_prepareforrsem", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.2", + "git_commit" : "5f6516e9c0d95c84f3d4159a67d3de19d3ae1fde", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-30-g5f6516e" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "src", "target" : "target", "config_mods" : [