From c00bcda3f8fc85d1b2ea88d99624479377499ed6 Mon Sep 17 00:00:00 2001 From: CI Date: Tue, 6 May 2025 20:05:48 +0000 Subject: [PATCH] Build branch main with version main (f52978a) Build pipeline: viash-hub.rnaseq.main-k4ttq Source commit: https://github.com/viash-hub/rnaseq/commit/f52978a0e25cae182b7874b4b8aa3afc183e880e Source message: Fix summarizedexperiment build (#42) --- CHANGELOG.md | 9 +- _viash.yaml | 4 +- src/assets/optional_file.txt | 1 + src/assets/required_file.txt | 1 + src/deseq2_qc/config.vsh.yaml | 10 +- src/deseq2_qc/script.r | 5 +- src/extra/copy_if_exists/config.vsh.yaml | 32 + src/extra/copy_if_exists/script.sh | 25 + src/summarizedexperiment/config.vsh.yaml | 4 +- .../bbmap/bbmap_bbsplit/.config.vsh.yaml | 13 +- .../main/nextflow/bbmap/bbmap_bbsplit/main.nf | 471 +- .../bbmap/bbmap_bbsplit/nextflow_schema.json | 6 +- .../bedtools_genomecov/.config.vsh.yaml | 13 +- .../bedtools/bedtools_genomecov/main.nf | 471 +- .../main/nextflow/fastp/.config.vsh.yaml | 13 +- .../vsh/biobox/main/nextflow/fastp/main.nf | 471 +- .../main/nextflow/fastp/nextflow_schema.json | 6 +- .../main/nextflow/fastqc/.config.vsh.yaml | 13 +- .../vsh/biobox/main/nextflow/fastqc/main.nf | 471 +- .../nextflow/featurecounts/.config.vsh.yaml | 13 +- .../main/nextflow/featurecounts/main.nf | 473 +- .../featurecounts/nextflow_schema.json | 6 +- .../nextflow/fq_subsample/.config.vsh.yaml | 13 +- .../biobox/main/nextflow/fq_subsample/main.nf | 471 +- .../fq_subsample/nextflow_schema.json | 12 +- .../main/nextflow/gffread/.config.vsh.yaml | 13 +- .../vsh/biobox/main/nextflow/gffread/main.nf | 471 +- .../kallisto/kallisto_index/.config.vsh.yaml | 13 +- .../nextflow/kallisto/kallisto_index/main.nf | 471 +- .../kallisto_index/nextflow_schema.json | 6 +- .../kallisto/kallisto_quant/.config.vsh.yaml | 13 +- .../nextflow/kallisto/kallisto_quant/main.nf | 471 +- .../kallisto_quant/nextflow_schema.json | 12 +- .../main/nextflow/multiqc/.config.vsh.yaml | 13 +- .../vsh/biobox/main/nextflow/multiqc/main.nf | 471 +- .../nextflow/multiqc/nextflow_schema.json | 12 +- .../qualimap/qualimap_rnaseq/.config.vsh.yaml | 13 +- .../nextflow/qualimap/qualimap_rnaseq/main.nf | 471 +- .../qualimap_rnaseq/nextflow_schema.json | 6 +- .../.config.vsh.yaml | 13 +- .../rsem/rsem_calculate_expression/main.nf | 471 +- .../rsem_prepare_reference/.config.vsh.yaml | 13 +- .../rsem/rsem_prepare_reference/main.nf | 471 +- .../nextflow_schema.json | 6 +- .../rseqc/rseqc_bamstat/.config.vsh.yaml | 13 +- .../main/nextflow/rseqc/rseqc_bamstat/main.nf | 471 +- .../rseqc/rseqc_bamstat/nextflow_schema.json | 6 +- .../rseqc_inferexperiment/.config.vsh.yaml | 13 +- .../rseqc/rseqc_inferexperiment/main.nf | 471 +- .../rseqc_inner_distance/.config.vsh.yaml | 13 +- .../rseqc/rseqc_inner_distance/main.nf | 471 +- .../rseqc_inner_distance/nextflow_schema.json | 30 +- .../salmon/salmon_index/.config.vsh.yaml | 13 +- .../main/nextflow/salmon/salmon_index/main.nf | 473 +- .../salmon/salmon_index/nextflow_schema.json | 6 +- .../salmon/salmon_quant/.config.vsh.yaml | 13 +- .../main/nextflow/salmon/salmon_quant/main.nf | 471 +- .../salmon/salmon_quant/nextflow_schema.json | 12 +- .../samtools_flagstat/.config.vsh.yaml | 13 +- .../samtools/samtools_flagstat/main.nf | 471 +- .../samtools_idxstats/.config.vsh.yaml | 13 +- .../samtools/samtools_idxstats/main.nf | 471 +- .../samtools/samtools_index/.config.vsh.yaml | 13 +- .../nextflow/samtools/samtools_index/main.nf | 471 +- .../samtools/samtools_sort/.config.vsh.yaml | 13 +- .../nextflow/samtools/samtools_sort/main.nf | 471 +- .../samtools/samtools_stats/.config.vsh.yaml | 13 +- .../nextflow/samtools/samtools_stats/main.nf | 471 +- .../main/nextflow/sortmerna/.config.vsh.yaml | 13 +- .../biobox/main/nextflow/sortmerna/main.nf | 471 +- .../nextflow/sortmerna/nextflow_schema.json | 12 +- .../star/star_align_reads/.config.vsh.yaml | 13 +- .../nextflow/star/star_align_reads/main.nf | 473 +- .../star_genome_generate/.config.vsh.yaml | 13 +- .../star/star_genome_generate/main.nf | 479 ++- .../star_genome_generate/nextflow_schema.json | 6 +- .../main/nextflow/trimgalore/.config.vsh.yaml | 13 +- .../biobox/main/nextflow/trimgalore/main.nf | 471 +- .../nextflow/trimgalore/nextflow_schema.json | 6 +- .../umi_tools_dedup/.config.vsh.yaml | 13 +- .../umi_tools/umi_tools_dedup/main.nf | 471 +- .../umi_tools_dedup/nextflow_schema.json | 6 +- .../umi_tools_extract/.config.vsh.yaml | 13 +- .../umi_tools/umi_tools_extract/main.nf | 471 +- .../umi_tools_extract/nextflow_schema.json | 30 +- .../umi_tools_prepareforrsem/.config.vsh.yaml | 13 +- .../umi_tools_prepareforrsem/main.nf | 471 +- .../nextflow_schema.json | 12 +- .../bedtools_genomecov/.config.vsh.yaml | 13 +- .../bedtools_genomecov/bedtools_genomecov | 92 +- .../cat_additional_fasta/.config.vsh.yaml | 13 +- .../cat_additional_fasta/cat_additional_fasta | 102 +- target/executable/cat_fastq/.config.vsh.yaml | 13 +- target/executable/cat_fastq/cat_fastq | 86 +- .../copy_if_exists/.config.vsh.yaml | 183 + .../executable/copy_if_exists/copy_if_exists | 1136 +++++ .../copy_if_exists/nextflow_labels.config | 44 + .../copy_if_exists/optional_file.txt | 1 + .../copy_if_exists/required_file.txt | 1 + target/executable/deseq2_qc/.config.vsh.yaml | 25 +- target/executable/deseq2_qc/deseq2_qc | 160 +- target/executable/dupradar/.config.vsh.yaml | 14 +- target/executable/dupradar/dupradar | 172 +- .../executable/getchromsizes/.config.vsh.yaml | 13 +- target/executable/getchromsizes/getchromsizes | 82 +- target/executable/gtf2bed/.config.vsh.yaml | 13 +- target/executable/gtf2bed/gtf2bed | 66 +- target/executable/gtf_filter/.config.vsh.yaml | 13 +- target/executable/gtf_filter/gtf_filter | 82 +- target/executable/gunzip/.config.vsh.yaml | 13 +- target/executable/gunzip/gunzip | 66 +- .../multiqc_custom_biotype/.config.vsh.yaml | 13 +- .../multiqc_custom_biotype | 94 +- .../picard_markduplicates/.config.vsh.yaml | 13 +- .../picard_markduplicates | 118 +- .../prepare_multiqc_input/.config.vsh.yaml | 13 +- .../prepare_multiqc_input | 258 +- .../.config.vsh.yaml | 13 +- .../preprocess_transcripts_fasta | 66 +- .../preseq_lcextrap/.config.vsh.yaml | 13 +- .../preseq_lcextrap/preseq_lcextrap | 82 +- .../rsem_merge_counts/.config.vsh.yaml | 13 +- .../rsem_merge_counts/rsem_merge_counts | 108 +- .../rseqc_junctionannotation/.config.vsh.yaml | 13 +- .../rseqc_junctionannotation | 166 +- .../rseqc_junctionsaturation/.config.vsh.yaml | 13 +- .../rseqc_junctionsaturation | 172 +- .../rseqc_readdistribution/.config.vsh.yaml | 13 +- .../rseqc_readdistribution | 76 +- .../rseqc_readduplication/.config.vsh.yaml | 13 +- .../rseqc_readduplication | 126 +- .../rseqc/rseqc_tin/.config.vsh.yaml | 13 +- target/executable/rseqc/rseqc_tin/rseqc_tin | 132 +- target/executable/sortmerna/.config.vsh.yaml | 13 +- target/executable/sortmerna/sortmerna | 114 +- target/executable/stringtie/.config.vsh.yaml | 13 +- target/executable/stringtie/stringtie | 122 +- .../summarizedexperiment/.config.vsh.yaml | 21 +- .../summarizedexperiment/summarizedexperiment | 112 +- target/executable/tx2gene/.config.vsh.yaml | 13 +- target/executable/tx2gene/tx2gene | 96 +- target/executable/tximport/.config.vsh.yaml | 14 +- target/executable/tximport/tximport | 148 +- .../executable/ucsc/bedclip/.config.vsh.yaml | 13 +- target/executable/ucsc/bedclip/bedclip | 76 +- .../ucsc/bedgraphtobigwig/.config.vsh.yaml | 13 +- .../ucsc/bedgraphtobigwig/bedgraphtobigwig | 76 +- .../.config.vsh.yaml | 13 +- .../genome_alignment_and_quant | 23 +- .../merge_quant_results/.config.vsh.yaml | 13 +- .../merge_quant_results/merge_quant_results | 23 +- .../post_processing/.config.vsh.yaml | 13 +- .../workflows/post_processing/post_processing | 23 +- .../workflows/pre_processing/.config.vsh.yaml | 13 +- .../workflows/pre_processing/pre_processing | 23 +- .../workflows/prepare_genome/.config.vsh.yaml | 13 +- .../workflows/prepare_genome/prepare_genome | 23 +- .../.config.vsh.yaml | 13 +- .../pseudo_alignment_and_quant | 23 +- .../quality_control/.config.vsh.yaml | 13 +- .../workflows/quality_control/quality_control | 23 +- .../workflows/rnaseq/.config.vsh.yaml | 13 +- target/executable/workflows/rnaseq/rnaseq | 23 +- .../bedtools_genomecov/.config.vsh.yaml | 13 +- target/nextflow/bedtools_genomecov/main.nf | 383 +- .../bedtools_genomecov/nextflow_schema.json | 12 +- .../cat_additional_fasta/.config.vsh.yaml | 13 +- target/nextflow/cat_additional_fasta/main.nf | 383 +- .../cat_additional_fasta/nextflow_schema.json | 12 +- target/nextflow/cat_fastq/.config.vsh.yaml | 13 +- target/nextflow/cat_fastq/main.nf | 383 +- .../nextflow/cat_fastq/nextflow_schema.json | 12 +- .../nextflow/copy_if_exists/.config.vsh.yaml | 183 + target/nextflow/copy_if_exists/main.nf | 3813 +++++++++++++++++ .../nextflow/copy_if_exists/nextflow.config | 124 + .../copy_if_exists/nextflow_labels.config | 44 + .../copy_if_exists/nextflow_schema.json | 105 + .../nextflow/copy_if_exists/optional_file.txt | 1 + .../nextflow/copy_if_exists/required_file.txt | 1 + target/nextflow/deseq2_qc/.config.vsh.yaml | 25 +- target/nextflow/deseq2_qc/main.nf | 402 +- .../nextflow/deseq2_qc/nextflow_schema.json | 18 +- target/nextflow/dupradar/.config.vsh.yaml | 14 +- target/nextflow/dupradar/main.nf | 386 +- target/nextflow/dupradar/nextflow_schema.json | 42 +- .../nextflow/getchromsizes/.config.vsh.yaml | 13 +- target/nextflow/getchromsizes/main.nf | 383 +- .../getchromsizes/nextflow_schema.json | 18 +- target/nextflow/gtf2bed/.config.vsh.yaml | 13 +- target/nextflow/gtf2bed/main.nf | 383 +- target/nextflow/gtf2bed/nextflow_schema.json | 6 +- target/nextflow/gtf_filter/.config.vsh.yaml | 13 +- target/nextflow/gtf_filter/main.nf | 383 +- .../nextflow/gtf_filter/nextflow_schema.json | 6 +- target/nextflow/gunzip/.config.vsh.yaml | 13 +- target/nextflow/gunzip/main.nf | 383 +- target/nextflow/gunzip/nextflow_schema.json | 6 +- .../multiqc_custom_biotype/.config.vsh.yaml | 13 +- .../nextflow/multiqc_custom_biotype/main.nf | 383 +- .../nextflow_schema.json | 12 +- .../picard_markduplicates/.config.vsh.yaml | 13 +- target/nextflow/picard_markduplicates/main.nf | 383 +- .../nextflow_schema.json | 18 +- .../prepare_multiqc_input/.config.vsh.yaml | 13 +- target/nextflow/prepare_multiqc_input/main.nf | 383 +- .../nextflow_schema.json | 6 +- .../.config.vsh.yaml | 13 +- .../preprocess_transcripts_fasta/main.nf | 383 +- .../nextflow_schema.json | 6 +- .../nextflow/preseq_lcextrap/.config.vsh.yaml | 13 +- target/nextflow/preseq_lcextrap/main.nf | 383 +- .../preseq_lcextrap/nextflow_schema.json | 6 +- .../rsem_merge_counts/.config.vsh.yaml | 13 +- target/nextflow/rsem_merge_counts/main.nf | 383 +- .../rsem_merge_counts/nextflow_schema.json | 24 +- .../rseqc_junctionannotation/.config.vsh.yaml | 13 +- .../rseqc/rseqc_junctionannotation/main.nf | 383 +- .../nextflow_schema.json | 42 +- .../rseqc_junctionsaturation/.config.vsh.yaml | 13 +- .../rseqc/rseqc_junctionsaturation/main.nf | 383 +- .../nextflow_schema.json | 12 +- .../rseqc_readdistribution/.config.vsh.yaml | 13 +- .../rseqc/rseqc_readdistribution/main.nf | 383 +- .../nextflow_schema.json | 6 +- .../rseqc_readduplication/.config.vsh.yaml | 13 +- .../rseqc/rseqc_readduplication/main.nf | 383 +- .../nextflow_schema.json | 24 +- .../nextflow/rseqc/rseqc_tin/.config.vsh.yaml | 13 +- target/nextflow/rseqc/rseqc_tin/main.nf | 383 +- .../rseqc/rseqc_tin/nextflow_schema.json | 12 +- target/nextflow/sortmerna/.config.vsh.yaml | 13 +- target/nextflow/sortmerna/main.nf | 383 +- .../nextflow/sortmerna/nextflow_schema.json | 18 +- target/nextflow/stringtie/.config.vsh.yaml | 13 +- target/nextflow/stringtie/main.nf | 383 +- .../nextflow/stringtie/nextflow_schema.json | 18 +- .../summarizedexperiment/.config.vsh.yaml | 21 +- target/nextflow/summarizedexperiment/main.nf | 396 +- .../summarizedexperiment/nextflow_schema.json | 6 +- target/nextflow/tx2gene/.config.vsh.yaml | 13 +- target/nextflow/tx2gene/main.nf | 383 +- target/nextflow/tx2gene/nextflow_schema.json | 6 +- target/nextflow/tximport/.config.vsh.yaml | 14 +- target/nextflow/tximport/main.nf | 386 +- target/nextflow/tximport/nextflow_schema.json | 48 +- target/nextflow/ucsc/bedclip/.config.vsh.yaml | 13 +- target/nextflow/ucsc/bedclip/main.nf | 383 +- .../ucsc/bedclip/nextflow_schema.json | 6 +- .../ucsc/bedgraphtobigwig/.config.vsh.yaml | 13 +- target/nextflow/ucsc/bedgraphtobigwig/main.nf | 383 +- .../bedgraphtobigwig/nextflow_schema.json | 6 +- .../.config.vsh.yaml | 13 +- .../genome_alignment_and_quant/main.nf | 383 +- .../nextflow_schema.json | 120 +- .../merge_quant_results/.config.vsh.yaml | 13 +- .../workflows/merge_quant_results/main.nf | 383 +- .../merge_quant_results/nextflow_schema.json | 6 +- .../post_processing/.config.vsh.yaml | 13 +- .../workflows/post_processing/main.nf | 383 +- .../post_processing/nextflow_schema.json | 84 +- .../workflows/pre_processing/.config.vsh.yaml | 13 +- .../nextflow/workflows/pre_processing/main.nf | 383 +- .../pre_processing/nextflow_schema.json | 102 +- .../workflows/prepare_genome/.config.vsh.yaml | 13 +- .../nextflow/workflows/prepare_genome/main.nf | 383 +- .../prepare_genome/nextflow_schema.json | 66 +- .../.config.vsh.yaml | 13 +- .../pseudo_alignment_and_quant/main.nf | 383 +- .../nextflow_schema.json | 24 +- .../quality_control/.config.vsh.yaml | 13 +- .../workflows/quality_control/main.nf | 383 +- .../quality_control/nextflow_schema.json | 330 +- .../workflows/rnaseq/.config.vsh.yaml | 13 +- target/nextflow/workflows/rnaseq/main.nf | 383 +- .../workflows/rnaseq/nextflow_schema.json | 660 +-- 275 files changed, 31239 insertions(+), 8651 deletions(-) create mode 100644 src/assets/optional_file.txt create mode 100644 src/assets/required_file.txt create mode 100644 src/extra/copy_if_exists/config.vsh.yaml create mode 100644 src/extra/copy_if_exists/script.sh create mode 100644 target/executable/copy_if_exists/.config.vsh.yaml create mode 100755 target/executable/copy_if_exists/copy_if_exists create mode 100644 target/executable/copy_if_exists/nextflow_labels.config create mode 100644 target/executable/copy_if_exists/optional_file.txt create mode 100644 target/executable/copy_if_exists/required_file.txt create mode 100644 target/nextflow/copy_if_exists/.config.vsh.yaml create mode 100644 target/nextflow/copy_if_exists/main.nf create mode 100644 target/nextflow/copy_if_exists/nextflow.config create mode 100644 target/nextflow/copy_if_exists/nextflow_labels.config create mode 100644 target/nextflow/copy_if_exists/nextflow_schema.json create mode 100644 target/nextflow/copy_if_exists/optional_file.txt create mode 100644 target/nextflow/copy_if_exists/required_file.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c2c965..274e578 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,10 @@ -# RNAseq version x.y.z +# rnaseq v0.2.1 -## Changes +## Bug fixes -- Add resources labels -- Increase the number of components that are fetched from BioBox +* Fix `summarizedexperiment` build PR (#42). + +* Fix an issue with the `deseq2_qc` component not being able to create the DESeq2 object (PR #41). ## Known issues diff --git a/_viash.yaml b/_viash.yaml index 1a91a45..ada7b11 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,13 +1,13 @@ name: rnaseq -viash_version: 0.9.0 +viash_version: 0.9.2 source: src target: target info: test_resources: - - path: gs://viash-hub-test-data/rnaseq/v1 + - path: gs://viash-hub-resources/rnaseq/v1 dest: testData config_mods: | diff --git a/src/assets/optional_file.txt b/src/assets/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/src/assets/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/src/assets/required_file.txt b/src/assets/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/src/assets/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/src/deseq2_qc/config.vsh.yaml b/src/deseq2_qc/config.vsh.yaml index 56b557d..8ee1be8 100644 --- a/src/deseq2_qc/config.vsh.yaml +++ b/src/deseq2_qc/config.vsh.yaml @@ -68,8 +68,16 @@ test_resources: engines: - type: docker - image: rocker/r2u:22.04 + image: debian:latest setup: + - type: apt + packages: + - libcurl4-openssl-dev + - r-base + - r-base-core + - libxml2-dev + - procps + - libssl-dev - type: r cran: [ optparse, ggplot2, RColorBrewer, pheatmap, stringr, matrixStats ] bioc: [ DESeq2 ] diff --git a/src/deseq2_qc/script.r b/src/deseq2_qc/script.r index 7e4bc4c..a78c575 100755 --- a/src/deseq2_qc/script.r +++ b/src/deseq2_qc/script.r @@ -8,7 +8,8 @@ par <- list( deseq2_output = "deseq2", pca_multiqc = "pca.vals_mqc.tsv", dists_multiqc = "sample.dists_mqc.tsv", - vst = FALSE + vst = FALSE, + outdir = '.' ) meta <- list( resources_dir = "src/deseq2_qc" @@ -100,7 +101,7 @@ saveRDS(dds, file = sub("\\.dds\\.RData$", ".rds", DDSFile)) ##' @author Gavin Kelly plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { - rv <- rowVars(assay(object, assay)) + rv <- rowVars(assay(object, assay), useNames = TRUE) select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) percentVar <- pca$sdev^2 / sum(pca$sdev^2) diff --git a/src/extra/copy_if_exists/config.vsh.yaml b/src/extra/copy_if_exists/config.vsh.yaml new file mode 100644 index 0000000..ffd4da7 --- /dev/null +++ b/src/extra/copy_if_exists/config.vsh.yaml @@ -0,0 +1,32 @@ +name: "copy_if_exists" +argument_groups: + - name: "Input" + arguments: + - name: "--required_file" + type: file + must_exist: false + required: true + example: /tmp/rnaseq_workflow_config/required_file.txt + - name: --optional_file + type: file + must_exist: false + example: /tmp/rnaseq_workflow_config/optional_file.txt + + - name: "Ouput" + arguments: + - name: "--output" + type: file + direction: output + default: copy_if_exists_output +resources: + - type: bash_script + path: script.sh + - path: /src/assets/required_file.txt + - path: /src/assets/optional_file.txt + +engines: + - type: docker + image: ubuntu:22.04 +runners: + - type: executable + - type: nextflow diff --git a/src/extra/copy_if_exists/script.sh b/src/extra/copy_if_exists/script.sh new file mode 100644 index 0000000..c14ce5f --- /dev/null +++ b/src/extra/copy_if_exists/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -eo pipefail + +mkdir -p $par_output + +# This file is checked by the Nextflow module wrapper +cp $par_required_file "$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z $par_optional_file ]; then + echo "No optional_file provided, using the default" + cp $meta_resources_dir/optional_file.txt "$par_output" +else + echo "Optional file provided" + if [ -f $par_optional_file ]; then + cp $par_optional_file "$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" diff --git a/src/summarizedexperiment/config.vsh.yaml b/src/summarizedexperiment/config.vsh.yaml index 596d2db..f0968a3 100644 --- a/src/summarizedexperiment/config.vsh.yaml +++ b/src/summarizedexperiment/config.vsh.yaml @@ -39,10 +39,8 @@ resources: engines: - type: docker - image: ubuntu:22.04 + image: rocker/r2u:22.04 setup: - - type: apt - packages: [ r-base, libcurl4-openssl-dev ] - type: r bioc: [ SummarizedExperiment, tximeta ] runners: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml index d115281..f5a0792 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml @@ -267,6 +267,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -367,16 +370,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bbmap/bbmap_bbsplit" executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf index be42fc5..2593b20 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf @@ -1,6 +1,6 @@ // bbmap_bbsplit main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3101,6 +3327,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3219,16 +3449,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bbmap/bbmap_bbsplit", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3729,7 +3959,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3743,6 +3973,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json index e33875e..7b8b1ed 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json @@ -187,10 +187,10 @@ "index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index", - "help_text": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index.\n" + "description": "Type: `file`, default: `$id.$key.index`, example: `BBSplit_index`. Location to write the index", + "help_text": "Type: `file`, default: `$id.$key.index`, example: `BBSplit_index`. Location to write the index.\n" , - "default":"$id.$key.index.index" + "default":"$id.$key.index" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml index 49a48c4..8062d5b 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml @@ -226,6 +226,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -336,16 +339,16 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools/bedtools_genomecov" executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf index ff93428..c64dca2 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf @@ -1,6 +1,6 @@ // bedtools_genomecov main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3073,6 +3299,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3206,16 +3436,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bedtools/bedtools_genomecov", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3677,7 +3907,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3691,6 +3921,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/.config.vsh.yaml index 14c8381..95b7f31 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/.config.vsh.yaml @@ -982,6 +982,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1082,16 +1085,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fastp" executable: "target/nextflow/fastp/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf index b3df043..00d02b0 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf @@ -1,6 +1,6 @@ // fastp main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3901,6 +4127,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -4022,16 +4252,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fastp", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4606,7 +4836,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4620,6 +4850,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/nextflow_schema.json index bbc0c53..3467000 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastp/nextflow_schema.json @@ -102,10 +102,10 @@ "overlapped_out": { "type": "string", - "description": "Type: `file`, default: `$id.$key.overlapped_out.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base", - "help_text": "Type: `file`, default: `$id.$key.overlapped_out.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base.\n" + "description": "Type: `file`, default: `$id.$key.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base", + "help_text": "Type: `file`, default: `$id.$key.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base.\n" , - "default":"$id.$key.overlapped_out.overlapped_out" + "default":"$id.$key.overlapped_out" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml index fdd4416..047327f 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml @@ -239,6 +239,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -339,16 +342,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fastqc" executable: "target/nextflow/fastqc/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf index 221b751..cc66f7d 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf @@ -1,6 +1,6 @@ // fastqc main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3062,6 +3288,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3181,16 +3411,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fastqc", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3678,7 +3908,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3692,6 +3922,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/.config.vsh.yaml index f59e1f1..6b958d2 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/.config.vsh.yaml @@ -543,6 +543,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -644,16 +647,16 @@ build_info: engine: "docker|native" output: "target/nextflow/featurecounts" executable: "target/nextflow/featurecounts/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf index d9852f3..cdc6558 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf @@ -1,6 +1,6 @@ // featurecounts main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3427,6 +3653,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3548,16 +3778,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/featurecounts", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3662,7 +3892,7 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" ## VIASH END # create temporary directory -tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_functionality_name}_XXXXXX") +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_name}_XXXXXX") mkdir -p "\\$tmp_dir/temp" # create detailed_results directory if variable is set and directory does not exist @@ -4089,7 +4319,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4103,6 +4333,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/nextflow_schema.json index 8b277f2..9276bfa 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/nextflow_schema.json @@ -573,10 +573,10 @@ "detailed_results": { "type": "string", - "description": "Type: `file`, default: `$id.$key.detailed_results.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results", - "help_text": "Type: `file`, default: `$id.$key.detailed_results.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results. Use `--detailed_results_format` to determine the format of the detailed results.\n" + "description": "Type: `file`, default: `$id.$key.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results", + "help_text": "Type: `file`, default: `$id.$key.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results. Use `--detailed_results_format` to determine the format of the detailed results.\n" , - "default":"$id.$key.detailed_results.detailed_results" + "default":"$id.$key.detailed_results" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml index d56f5cb..22928b5 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml @@ -88,6 +88,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -189,16 +192,16 @@ build_info: engine: "docker|native" output: "target/nextflow/fq_subsample" executable: "target/nextflow/fq_subsample/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf index ae3f2e3..cc6aacd 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf @@ -1,6 +1,6 @@ // fq_subsample main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2914,6 +3140,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3031,16 +3261,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/fq_subsample", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3459,7 +3689,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3473,6 +3703,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json index 511bd91..83a6f61 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json @@ -47,10 +47,10 @@ "output_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files", - "help_text": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." + "description": "Type: `file`, default: `$id.$key.output_1`. Sampled read 1 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." , - "default":"$id.$key.output_1.output_1" + "default":"$id.$key.output_1" } @@ -58,10 +58,10 @@ "output_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files", - "help_text": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." + "description": "Type: `file`, default: `$id.$key.output_2`. Sampled read 2 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." , - "default":"$id.$key.output_2.output_2" + "default":"$id.$key.output_2" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/.config.vsh.yaml index 54dad4b..0e86d7d 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/.config.vsh.yaml @@ -582,6 +582,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -684,16 +687,16 @@ build_info: engine: "docker|native" output: "target/nextflow/gffread" executable: "target/nextflow/gffread/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/main.nf index 0b9416b..64b6161 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/gffread/main.nf @@ -1,6 +1,6 @@ // gffread main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3482,6 +3708,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3605,16 +3835,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/gffread", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4192,7 +4422,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4206,6 +4436,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml index adb39ff..9fed6f8 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml @@ -113,6 +113,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -217,16 +220,16 @@ build_info: engine: "docker|native" output: "target/nextflow/kallisto/kallisto_index" executable: "target/nextflow/kallisto/kallisto_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf index c324d4a..985e23e 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf @@ -1,6 +1,6 @@ // kallisto_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2948,6 +3174,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3070,16 +3300,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/kallisto/kallisto_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3509,7 +3739,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3523,6 +3753,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json index 6d0b616..44e9e57 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json @@ -47,10 +47,10 @@ "index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. ", - "help_text": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. " + "description": "Type: `file`, default: `$id.$key.index`, example: `Kallisto_index`. ", + "help_text": "Type: `file`, default: `$id.$key.index`, example: `Kallisto_index`. " , - "default":"$id.$key.index.index" + "default":"$id.$key.index" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml index 75ae5e7..3312ba1 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml @@ -137,6 +137,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -245,16 +248,16 @@ build_info: engine: "docker|native" output: "target/nextflow/kallisto/kallisto_quant" executable: "target/nextflow/kallisto/kallisto_quant/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf index d8f9d46..f05e904 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf @@ -1,6 +1,6 @@ // kallisto_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2975,6 +3201,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3104,16 +3334,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/kallisto/kallisto_quant", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3557,7 +3787,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3571,6 +3801,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json index 40a2444..384e153 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json @@ -47,10 +47,10 @@ "output_dir": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to", - "help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to." + "description": "Type: `file`, required, default: `$id.$key.output_dir`. Directory to write output to", + "help_text": "Type: `file`, required, default: `$id.$key.output_dir`. Directory to write output to." , - "default":"$id.$key.output_dir.output_dir" + "default":"$id.$key.output_dir" } @@ -58,10 +58,10 @@ "log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant", - "help_text": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant" + "description": "Type: `file`, default: `$id.$key.log`. File containing log information from running kallisto quant", + "help_text": "Type: `file`, default: `$id.$key.log`. File containing log information from running kallisto quant" , - "default":"$id.$key.log.log" + "default":"$id.$key.log" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/.config.vsh.yaml index 3475e44..02ad583 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/.config.vsh.yaml @@ -357,6 +357,9 @@ info: doi: "10.1093/bioinformatics/btw354" licence: "GPL v3 or later" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -455,16 +458,16 @@ build_info: engine: "docker|native" output: "target/nextflow/multiqc" executable: "target/nextflow/multiqc/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf index 71ef4c3..938679b 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf @@ -1,6 +1,6 @@ // multiqc main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3246,6 +3472,10 @@ meta = [ "licence" : "GPL v3 or later" }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3365,16 +3595,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/multiqc", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3935,7 +4165,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3949,6 +4179,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/nextflow_schema.json index 6a3bd5c..04c0a3b 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/nextflow_schema.json @@ -48,10 +48,10 @@ "output_data": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_data.output_data`, example: `multiqc_data`. Output directory for parsed data files", - "help_text": "Type: `file`, default: `$id.$key.output_data.output_data`, example: `multiqc_data`. Output directory for parsed data files. If not provided, parsed data will not be published.\n" + "description": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files", + "help_text": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files. If not provided, parsed data will not be published.\n" , - "default":"$id.$key.output_data.output_data" + "default":"$id.$key.output_data" } @@ -59,10 +59,10 @@ "output_plots": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plots.output_plots`, example: `multiqc_plots`. Output directory for generated plots", - "help_text": "Type: `file`, default: `$id.$key.output_plots.output_plots`, example: `multiqc_plots`. Output directory for generated plots. If not provided, plots will not be published.\n" + "description": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots", + "help_text": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots. If not provided, plots will not be published.\n" , - "default":"$id.$key.output_plots.output_plots" + "default":"$id.$key.output_plots" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml index b4b0553..1729163 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml @@ -161,6 +161,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -263,16 +266,16 @@ build_info: engine: "docker|native" output: "target/nextflow/qualimap/qualimap_rnaseq" executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf index 5acb4ba..aeaef47 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf @@ -1,6 +1,6 @@ // qualimap_rnaseq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3005,6 +3231,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3128,16 +3358,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/qualimap/qualimap_rnaseq", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3588,7 +3818,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3602,6 +3832,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json index 37135d5..47e34c9 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json @@ -58,10 +58,10 @@ "counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts", - "help_text": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts." + "description": "Type: `file`, default: `$id.$key.counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.counts`. Output file for computed counts." , - "default":"$id.$key.counts.counts" + "default":"$id.$key.counts" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml index 4e942c8..a5ce9e0 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml @@ -724,6 +724,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -851,16 +854,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rsem/rsem_calculate_expression" executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf index 56e7d17..b0c0a61 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf @@ -1,6 +1,6 @@ // rsem_calculate_expression main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3514,6 +3740,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3660,16 +3890,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rsem/rsem_calculate_expression", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4221,7 +4451,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4235,6 +4465,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml index 94266b8..9bd2ec3 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/.config.vsh.yaml @@ -268,6 +268,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -415,16 +418,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rsem/rsem_prepare_reference" executable: "target/nextflow/rsem/rsem_prepare_reference/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/main.nf index e4f08ad..7014ee8 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/main.nf @@ -1,6 +1,6 @@ // rsem_prepare_reference main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3085,6 +3311,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3244,16 +3474,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rsem/rsem_prepare_reference", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3705,7 +3935,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3719,6 +3949,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json index 1a04a5e..1d5c516 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/nextflow_schema.json @@ -47,10 +47,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Directory containing reference files generated by RSEM", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Directory containing reference files generated by RSEM." + "description": "Type: `file`, required, default: `$id.$key.output`. Directory containing reference files generated by RSEM", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Directory containing reference files generated by RSEM." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml index a1eb2e2..a54902d 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml @@ -68,6 +68,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -174,16 +177,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_bamstat" executable: "target/nextflow/rseqc/rseqc_bamstat/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf index 043175f..20ba5b3 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf @@ -1,6 +1,6 @@ // rseqc_bamstat main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2905,6 +3131,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3035,16 +3265,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_bamstat", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3445,7 +3675,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3459,6 +3689,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json index e84def6..4b31a3d 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json @@ -47,10 +47,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. Output file (txt) with mapping quality statistics", - "help_text": "Type: `file`, default: `$id.$key.output.output`. Output file (txt) with mapping quality statistics." + "description": "Type: `file`, default: `$id.$key.output`. Output file (txt) with mapping quality statistics", + "help_text": "Type: `file`, default: `$id.$key.output`. Output file (txt) with mapping quality statistics." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml index dd24905..22eda52 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml @@ -96,6 +96,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -200,16 +203,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_inferexperiment" executable: "target/nextflow/rseqc/rseqc_inferexperiment/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf index 8c96bf6..e25aafc 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf @@ -1,6 +1,6 @@ // rseqc_inferexperiment main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2942,6 +3168,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3068,16 +3298,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_inferexperiment", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3481,7 +3711,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3495,6 +3725,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml index 43639d0..be20f97 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml @@ -185,6 +185,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -293,16 +296,16 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_inner_distance" executable: "target/nextflow/rseqc/rseqc_inner_distance/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf index 0a0e0ec..10945a9 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf @@ -1,6 +1,6 @@ // rseqc_inner_distance main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3035,6 +3261,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3168,16 +3398,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/rseqc/rseqc_inner_distance", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3604,7 +3834,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3618,6 +3848,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json index 9f62271..6bcd0c8 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json @@ -107,10 +107,10 @@ "output_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_stats.output_stats`. output file (txt) with summary statistics of inner distances of paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_stats.output_stats`. output file (txt) with summary statistics of inner distances of paired reads" + "description": "Type: `file`, default: `$id.$key.output_stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_stats`. output file (txt) with summary statistics of inner distances of paired reads" , - "default":"$id.$key.output_stats.output_stats" + "default":"$id.$key.output_stats" } @@ -118,10 +118,10 @@ "output_dist": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dist.output_dist`. output file (txt) with inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_dist.output_dist`. output file (txt) with inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_dist`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_dist`. output file (txt) with inner distances of all paired reads" , - "default":"$id.$key.output_dist.output_dist" + "default":"$id.$key.output_dist" } @@ -129,10 +129,10 @@ "output_freq": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_freq.output_freq`. output file (txt) with frequencies of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_freq.output_freq`. output file (txt) with frequencies of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_freq`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_freq`. output file (txt) with frequencies of inner distances of all paired reads" , - "default":"$id.$key.output_freq.output_freq" + "default":"$id.$key.output_freq" } @@ -140,10 +140,10 @@ "output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_plot.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.output_plot.output_plot" + "default":"$id.$key.output_plot" } @@ -151,10 +151,10 @@ "output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot_r.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_plot_r.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.output_plot_r.output_plot_r" + "default":"$id.$key.output_plot_r" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/.config.vsh.yaml index 29e1d6a..568099c 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/.config.vsh.yaml @@ -176,6 +176,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -276,16 +279,16 @@ build_info: engine: "docker|native" output: "target/nextflow/salmon/salmon_index" executable: "target/nextflow/salmon/salmon_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf index 33a48b3..b378b13 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf @@ -1,6 +1,6 @@ // salmon_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3007,6 +3233,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3128,16 +3358,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/salmon/salmon_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3224,7 +3454,7 @@ for par in \\${unset_if_false[@]}; do [[ "\\$test_val" == "false" ]] && unset \\$par done -tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_functionality_name}_XXXXXX") +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" "\\${meta_name}_XXXXXX") mkdir -p "\\$tmp_dir/temp" if [[ -f "\\$par_genome" ]] && [[ ! "\\$par_decoys" ]]; then @@ -3593,7 +3823,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3607,6 +3837,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/nextflow_schema.json index d21c0ee..5e67d15 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/nextflow_schema.json @@ -153,10 +153,10 @@ "index": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.index.index`, example: `Salmon_index`. Salmon index\n", - "help_text": "Type: `file`, required, default: `$id.$key.index.index`, example: `Salmon_index`. Salmon index\n" + "description": "Type: `file`, required, default: `$id.$key.index`, example: `Salmon_index`. Salmon index\n", + "help_text": "Type: `file`, required, default: `$id.$key.index`, example: `Salmon_index`. Salmon index\n" , - "default":"$id.$key.index.index" + "default":"$id.$key.index" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/.config.vsh.yaml index cdcde82..e5eafd4 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/.config.vsh.yaml @@ -1072,6 +1072,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1172,16 +1175,16 @@ build_info: engine: "docker|native" output: "target/nextflow/salmon/salmon_quant" executable: "target/nextflow/salmon/salmon_quant/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf index c86bbd5..2634d29 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf @@ -1,6 +1,6 @@ // salmon_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3842,6 +4068,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3963,16 +4193,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/salmon/salmon_quant", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4608,7 +4838,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4622,6 +4852,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/nextflow_schema.json index 16d210f..2b1c3f3 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/nextflow_schema.json @@ -152,10 +152,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `quant_output`. Output quantification directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `quant_output`. Output quantification directory.\n" + "description": "Type: `file`, required, default: `$id.$key.output`, example: `quant_output`. Output quantification directory", + "help_text": "Type: `file`, required, default: `$id.$key.output`, example: `quant_output`. Output quantification directory.\n" , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } @@ -584,10 +584,10 @@ "aux_dir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.aux_dir.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e", - "help_text": "Type: `file`, default: `$id.$key.aux_dir.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e.g. bootstraps, bias parameters, etc. will be written.\n" + "description": "Type: `file`, default: `$id.$key.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e", + "help_text": "Type: `file`, default: `$id.$key.aux_dir`, example: `aux_info`. The sub-directory of the quantification directory where auxiliary information e.g. bootstraps, bias parameters, etc. will be written.\n" , - "default":"$id.$key.aux_dir.aux_dir" + "default":"$id.$key.aux_dir" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/.config.vsh.yaml index 16adec4..0c863fe 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/.config.vsh.yaml @@ -66,6 +66,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -172,16 +175,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_flagstat" executable: "target/nextflow/samtools/samtools_flagstat/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf index 31df84f..4df049e 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf @@ -1,6 +1,6 @@ // samtools_flagstat main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2901,6 +3127,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3027,16 +3257,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_flagstat", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3437,7 +3667,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3451,6 +3681,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/.config.vsh.yaml index ef273e9..4e44998 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/.config.vsh.yaml @@ -75,6 +75,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -182,16 +185,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_idxstats" executable: "target/nextflow/samtools/samtools_idxstats/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf index 2729e99..79091b1 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf @@ -1,6 +1,6 @@ // samtools_idxstats main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2912,6 +3138,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3039,16 +3269,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_idxstats", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3447,7 +3677,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3461,6 +3691,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/.config.vsh.yaml index a4ffaa2..1103c3f 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/.config.vsh.yaml @@ -84,6 +84,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -188,16 +191,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_index" executable: "target/nextflow/samtools/samtools_index/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/main.nf index 85b710d..a48e6cc 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/main.nf @@ -1,6 +1,6 @@ // samtools_index main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2928,6 +3154,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3052,16 +3282,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_index", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3471,7 +3701,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3485,6 +3715,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/.config.vsh.yaml index d341c2f..99e8af4 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/.config.vsh.yaml @@ -227,6 +227,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -331,16 +334,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_sort" executable: "target/nextflow/samtools/samtools_sort/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf index 81aa788..177ac27 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf @@ -1,6 +1,6 @@ // samtools_sort main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3100,6 +3326,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3224,16 +3454,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_sort", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3691,7 +3921,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3705,6 +3935,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/.config.vsh.yaml index 208326c..166980f 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/.config.vsh.yaml @@ -295,6 +295,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -400,16 +403,16 @@ build_info: engine: "docker|native" output: "target/nextflow/samtools/samtools_stats" executable: "target/nextflow/samtools/samtools_stats/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf index 958eac0..6c2f627 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf @@ -1,6 +1,6 @@ // samtools_stats main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3169,6 +3395,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3294,16 +3524,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/samtools/samtools_stats", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3753,7 +3983,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3767,6 +3997,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml index fff395c..9ef7fa4 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml @@ -487,6 +487,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -593,16 +596,16 @@ build_info: engine: "docker|native" output: "target/nextflow/sortmerna" executable: "target/nextflow/sortmerna/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf index ba848a9..138c7ed 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf @@ -1,6 +1,6 @@ // sortmerna main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3333,6 +3559,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3459,16 +3689,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/sortmerna", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4003,7 +4233,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4017,6 +4247,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json index c0e0df6..f3c2fcd 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json @@ -79,10 +79,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. Directory and file prefix for aligned output", - "help_text": "Type: `file`, default: `$id.$key.output.output`. Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf \u0027dir\u0027 is not specified, the output is created in the WORKDIR/out/.\nIf \u0027pfx\u0027 is not specified, the prefix \u0027aligned\u0027 is used.\n" + "description": "Type: `file`, default: `$id.$key.output`. Directory and file prefix for aligned output", + "help_text": "Type: `file`, default: `$id.$key.output`. Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf \u0027dir\u0027 is not specified, the output is created in the WORKDIR/out/.\nIf \u0027pfx\u0027 is not specified, the prefix \u0027aligned\u0027 is used.\n" , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } @@ -90,10 +90,10 @@ "other": { "type": "string", - "description": "Type: `file`, default: `$id.$key.other.other`. Create Non-aligned reads output file with this path/prefix", - "help_text": "Type: `file`, default: `$id.$key.other.other`. Create Non-aligned reads output file with this path/prefix. Must be used with fastx." + "description": "Type: `file`, default: `$id.$key.other`. Create Non-aligned reads output file with this path/prefix", + "help_text": "Type: `file`, default: `$id.$key.other`. Create Non-aligned reads output file with this path/prefix. Must be used with fastx." , - "default":"$id.$key.other.other" + "default":"$id.$key.other" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/.config.vsh.yaml index 86213b4..dd0abf7 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/.config.vsh.yaml @@ -2540,6 +2540,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -2662,16 +2665,16 @@ build_info: engine: "docker|native" output: "target/nextflow/star/star_align_reads" executable: "target/nextflow/star/star_align_reads/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/main.nf index 97daee4..3079420 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/main.nf @@ -1,6 +1,6 @@ // star_align_reads main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -177,7 +172,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -196,15 +191,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -217,6 +205,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1670,6 +1668,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1727,8 +1881,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1741,7 +1893,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1753,33 +1905,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1810,13 +1946,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1833,7 +1966,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1864,13 +1997,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1878,18 +2007,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2563,7 +2691,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2720,12 +2849,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2738,19 +2891,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2759,23 +2987,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -5794,6 +6020,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -5942,16 +6172,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/star/star_align_reads", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -5982,7 +6212,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import tempfile import subprocess @@ -6645,7 +6875,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -6659,6 +6889,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/.config.vsh.yaml index 2a12740..b3defa8 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/.config.vsh.yaml @@ -221,6 +221,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -332,16 +335,16 @@ build_info: engine: "docker|native" output: "target/nextflow/star/star_genome_generate" executable: "target/nextflow/star/star_genome_generate/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf index 17e745a..bc93e82 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf @@ -1,6 +1,6 @@ // star_genome_generate main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3063,6 +3289,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3194,16 +3424,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/star/star_genome_generate", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3292,10 +3522,10 @@ STAR \\\\ \\${par_genome_sa_index_nbases:+--genomeSAindexNbases "\\${par_genome_sa_index_nbases}"} \\\\ \\${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "\\${par_sjdb_gtf_chr_prefix}"} \\\\ \\${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "\\${par_sjdb_gtf_feature_exon}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtag_exon_parent_transcript "\\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtag_exon_parent_gene "\\${par_sjdb_gtf_tag_exon_parent_gene}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_geneName:+--sjdbGTFtag_exon_parent_geneName "\\${par_sjdb_gtf_tag_exon_parent_geneName}"} \\\\ - \\${par_sjdb_gtf_tag_exon_parent_geneType:+--sjdbGTFtag_exon_parent_geneType "\\${sjdbGTFtag_exon_parent_geneType}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtagExonParentTranscript "\\${par_sjdb_gtf_tag_exon_parent_transcript}"} \\\\ + \\${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtagExonParentGene "\\${par_sjdb_gtf_tag_exon_parent_gene}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_name:+--sjdbGTFtagExonParentGeneName "\\${sjdb_gtf_tag_exon_parent_gene_name}"} \\\\ + \\${sjdb_gtf_tag_exon_parent_gene_type:+--sjdbGTFtagExonParentGeneType "\\${sjdb_gtf_tag_exon_parent_gene_type}"} \\\\ \\${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "\\${par_limit_genome_generate_ram}"} \\\\ \\${par_genome_chr_bin_nbits:+--genomeChrBinNbits "\\${par_genome_chr_bin_nbits}"} \\\\ \\${par_genome_sa_sparse_d:+--genomeSAsparseD "\\${par_genome_sa_sparse_d}"} \\\\ @@ -3636,7 +3866,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3650,6 +3880,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/nextflow_schema.json index e8aadf1..27f6172 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/nextflow_schema.json @@ -187,10 +187,10 @@ "index": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.index.index`. STAR index directory", - "help_text": "Type: `file`, required, default: `$id.$key.index.index`. STAR index directory." + "description": "Type: `file`, required, default: `STAR_index`. STAR index directory", + "help_text": "Type: `file`, required, default: `STAR_index`. STAR index directory." , - "default":"$id.$key.index.index" + "default":"STAR_index" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml index a46f492..92c914d 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml @@ -668,6 +668,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -769,16 +772,16 @@ build_info: engine: "docker|native" output: "target/nextflow/trimgalore" executable: "target/nextflow/trimgalore/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf index c46ce51..ed82808 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf @@ -1,6 +1,6 @@ // trimgalore main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3438,6 +3664,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3559,16 +3789,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/trimgalore", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4138,7 +4368,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4152,6 +4382,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json index 9ee3b48..b022cc4 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json @@ -527,10 +527,10 @@ "output_dir": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory", - "help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory." + "description": "Type: `file`, required, default: `trimmed_output`. If specified all output will be written to this directory instead of the current directory", + "help_text": "Type: `file`, required, default: `trimmed_output`. If specified all output will be written to this directory instead of the current directory." , - "default":"$id.$key.output_dir.output_dir" + "default":"trimmed_output" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml index 6296dff..154533e 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml @@ -509,6 +509,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -610,16 +613,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_dedup" executable: "target/nextflow/umi_tools/umi_tools_dedup/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf index 910e67d..bd43834 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf @@ -1,6 +1,6 @@ // umi_tools_dedup main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -176,7 +171,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -195,15 +190,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -216,6 +204,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1669,6 +1667,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1726,8 +1880,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1740,7 +1892,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1752,33 +1904,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1809,13 +1945,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1832,7 +1965,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1863,13 +1996,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1877,18 +2006,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2562,7 +2690,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2719,12 +2848,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2737,19 +2890,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2758,23 +2986,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3364,6 +3590,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3486,16 +3716,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_dedup", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -4008,7 +4238,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4022,6 +4252,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json index 58ecde0..979543b 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json @@ -68,10 +68,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Deduplicated BAM file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Deduplicated BAM file." + "description": "Type: `file`, required, default: `$id.$key.output`. Deduplicated BAM file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Deduplicated BAM file." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml index dea32d2..8652e6b 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/.config.vsh.yaml @@ -331,6 +331,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -433,16 +436,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_extract" executable: "target/nextflow/umi_tools/umi_tools_extract/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf index af503a7..94f4060 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf @@ -1,6 +1,6 @@ // umi_tools_extract main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3175,6 +3401,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3298,16 +3528,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_extract", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3809,7 +4039,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3823,6 +4053,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json index 927e039..e8f0501 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/nextflow_schema.json @@ -67,10 +67,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output file for read 1", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output file for read 1." + "description": "Type: `file`, required, default: `$id.$key.output`. Output file for read 1", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Output file for read 1." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } @@ -78,10 +78,10 @@ "read2_out": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read2_out.read2_out`. Output file for read 2", - "help_text": "Type: `file`, default: `$id.$key.read2_out.read2_out`. Output file for read 2." + "description": "Type: `file`, default: `$id.$key.read2_out`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.read2_out`. Output file for read 2." , - "default":"$id.$key.read2_out.read2_out" + "default":"$id.$key.read2_out" } @@ -269,10 +269,10 @@ "log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.log.log`. File with logging information", - "help_text": "Type: `file`, default: `$id.$key.log.log`. File with logging information." + "description": "Type: `file`, default: `$id.$key.log`. File with logging information", + "help_text": "Type: `file`, default: `$id.$key.log`. File with logging information." , - "default":"$id.$key.log.log" + "default":"$id.$key.log" } @@ -301,10 +301,10 @@ "error": { "type": "string", - "description": "Type: `file`, default: `$id.$key.error.error`. File with error information", - "help_text": "Type: `file`, default: `$id.$key.error.error`. File with error information." + "description": "Type: `file`, default: `$id.$key.error`. File with error information", + "help_text": "Type: `file`, default: `$id.$key.error`. File with error information." , - "default":"$id.$key.error.error" + "default":"$id.$key.error" } @@ -332,10 +332,10 @@ "timeit": { "type": "string", - "description": "Type: `file`, default: `$id.$key.timeit.timeit`. Store timing information in file", - "help_text": "Type: `file`, default: `$id.$key.timeit.timeit`. Store timing information in file." + "description": "Type: `file`, default: `$id.$key.timeit`. Store timing information in file", + "help_text": "Type: `file`, default: `$id.$key.timeit`. Store timing information in file." , - "default":"$id.$key.timeit.timeit" + "default":"$id.$key.timeit" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml index 30bbe4a..5beb60f 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml @@ -153,6 +153,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -255,16 +258,16 @@ build_info: engine: "docker|native" output: "target/nextflow/umi_tools/umi_tools_prepareforrsem" executable: "target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" - viash_version: "0.9.0" - git_commit: "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b" - git_remote: "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox" - git_tag: "v0.2.0-27-g952ff08" + viash_version: "0.9.4" + git_commit: "c733de2e4cc21eccb241060f82bce0b332f79d81" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.2.0-32-gc733de2" package_config: name: "biobox" version: "main" description: "A collection of bioinformatics tools for working with sequence data.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf index df5b652..d471131 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf @@ -1,6 +1,6 @@ // umi_tools_prepareforrsem main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2997,6 +3223,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3120,16 +3350,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/umi_tools/umi_tools_prepareforrsem", - "viash_version" : "0.9.0", - "git_commit" : "952ff0843093b538cbfd6fefdecf2e7a0bc9e70b", - "git_remote" : "https://x-access-token:ghs_EwAUAMYJ0K4VBHlAEMs4ZP2OyQYqJM0PSfEO@github.com/viash-hub/biobox", - "git_tag" : "v0.2.0-27-g952ff08" + "viash_version" : "0.9.4", + "git_commit" : "c733de2e4cc21eccb241060f82bce0b332f79d81", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-32-gc733de2" }, "package_config" : { "name" : "biobox", "version" : "main", "description" : "A collection of bioinformatics tools for working with sequence data.\n", - "viash_version" : "0.9.0", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3562,7 +3792,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3576,6 +3806,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json index d7e1cd7..98b77e2 100644 --- a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json @@ -48,10 +48,10 @@ "log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.log.log`. File with logging information [default = stdout]", - "help_text": "Type: `file`, default: `$id.$key.log.log`. File with logging information [default = stdout]." + "description": "Type: `file`, default: `$id.$key.log`. File with logging information [default = stdout]", + "help_text": "Type: `file`, default: `$id.$key.log`. File with logging information [default = stdout]." , - "default":"$id.$key.log.log" + "default":"$id.$key.log" } @@ -59,10 +59,10 @@ "error": { "type": "string", - "description": "Type: `file`, default: `$id.$key.error.error`. File with error information [default = stderr]", - "help_text": "Type: `file`, default: `$id.$key.error.error`. File with error information [default = stderr]." + "description": "Type: `file`, default: `$id.$key.error`. File with error information [default = stderr]", + "help_text": "Type: `file`, default: `$id.$key.error`. File with error information [default = stderr]." , - "default":"$id.$key.error.error" + "default":"$id.$key.error" } diff --git a/target/executable/bedtools_genomecov/.config.vsh.yaml b/target/executable/bedtools_genomecov/.config.vsh.yaml index 70686df..950c50b 100644 --- a/target/executable/bedtools_genomecov/.config.vsh.yaml +++ b/target/executable/bedtools_genomecov/.config.vsh.yaml @@ -80,6 +80,9 @@ info: - "modules/local/bedtools_genomecov.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -182,15 +185,15 @@ build_info: engine: "docker|native" output: "target/executable/bedtools_genomecov" executable: "target/executable/bedtools_genomecov/bedtools_genomecov" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -201,7 +204,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/bedtools_genomecov/bedtools_genomecov b/target/executable/bedtools_genomecov/bedtools_genomecov index 7a0f9d3..80d363d 100755 --- a/target/executable/bedtools_genomecov/bedtools_genomecov +++ b/target/executable/bedtools_genomecov/bedtools_genomecov @@ -2,7 +2,7 @@ # bedtools_genomecov main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,35 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedtools_genomecov main" - echo "" - echo "Compute BEDGRAPH (-bg) summaries of feature coverage" - echo "" - echo "Input:" - echo " --strandedness" - echo " type: string" - echo " choices: [ unstranded, forward, reverse, auto ]" - echo " Sample strand-specificity." - echo "" - echo " --bam" - echo " type: file, file must exist" - echo " Genome BAM file" - echo "" - echo " --extra_bedtools_args" - echo " type: string" - echo " default:" - echo "" - echo "Output:" - echo " --bedgraph_forward" - echo " type: file, output, file must exist" - echo " default: \$id.forward.bedgraph" - echo "" - echo " --bedgraph_reverse" - echo " type: file, output, file must exist" - echo " default: \$id.reverse.bedgraph" -} # initialise variables VIASH_MODE='run' @@ -481,9 +452,9 @@ mv bedtools.static /usr/local/bin/bedtools && \ chmod a+x /usr/local/bin/bedtools LABEL org.opencontainers.image.description="Companion container for running component bedtools_genomecov" -LABEL org.opencontainers.image.created="2024-12-05T14:40:12Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -598,6 +569,61 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedtools_genomecov main" + echo "" + echo "Compute BEDGRAPH (-bg) summaries of feature coverage" + echo "" + echo "Input:" + echo " --strandedness" + echo " type: string" + echo " choices: [ unstranded, forward, reverse, auto ]" + echo " Sample strand-specificity." + echo "" + echo " --bam" + echo " type: file, file must exist" + echo " Genome BAM file" + echo "" + echo " --extra_bedtools_args" + echo " type: string" + echo " default:" + echo "" + echo "Output:" + echo " --bedgraph_forward" + echo " type: file, output, file must exist" + echo " default: \$id.forward.bedgraph" + echo "" + echo " --bedgraph_reverse" + echo " type: file, output, file must exist" + echo " default: \$id.reverse.bedgraph" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/cat_additional_fasta/.config.vsh.yaml b/target/executable/cat_additional_fasta/.config.vsh.yaml index 7c8f832..b349f7e 100644 --- a/target/executable/cat_additional_fasta/.config.vsh.yaml +++ b/target/executable/cat_additional_fasta/.config.vsh.yaml @@ -90,6 +90,9 @@ info: - "modules/local/cat_additional_fasta.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -186,15 +189,15 @@ build_info: engine: "docker|native" output: "target/executable/cat_additional_fasta" executable: "target/executable/cat_additional_fasta/cat_additional_fasta" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -205,7 +208,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/cat_additional_fasta/cat_additional_fasta b/target/executable/cat_additional_fasta/cat_additional_fasta index 312322f..4332f6b 100755 --- a/target/executable/cat_additional_fasta/cat_additional_fasta +++ b/target/executable/cat_additional_fasta/cat_additional_fasta @@ -2,7 +2,7 @@ # cat_additional_fasta main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,40 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cat_additional_fasta main" - echo "" - echo "Concatenate addional fasta file to reference FASTA and GTF files." - echo "" - echo "Input:" - echo " --fasta" - echo " type: file, required parameter, file must exist" - echo " Path to FASTA genome file." - echo "" - echo " --gtf" - echo " type: file, file must exist" - echo " Path to GTF annotation file." - echo "" - echo " --additional_fasta" - echo " type: file, file must exist" - echo " FASTA file to concatenate to genome FASTA file e.g. containing spike-in" - echo " sequences." - echo "" - echo " --biotype" - echo " type: string" - echo " Biotype value to use when appending entries to GTF file when additional" - echo " fasta file is provided." - echo "" - echo "Output:" - echo " --fasta_output" - echo " type: file, output, file must exist" - echo " Concatenated FASTA file." - echo "" - echo " --gtf_output" - echo " type: file, output, file must exist" - echo " Concatenated GTF file." -} # initialise variables VIASH_MODE='run' @@ -480,9 +446,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component cat_additional_fasta" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -597,6 +563,66 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cat_additional_fasta main" + echo "" + echo "Concatenate addional fasta file to reference FASTA and GTF files." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, required parameter, file must exist" + echo " Path to FASTA genome file." + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " Path to GTF annotation file." + echo "" + echo " --additional_fasta" + echo " type: file, file must exist" + echo " FASTA file to concatenate to genome FASTA file e.g. containing spike-in" + echo " sequences." + echo "" + echo " --biotype" + echo " type: string" + echo " Biotype value to use when appending entries to GTF file when additional" + echo " fasta file is provided." + echo "" + echo "Output:" + echo " --fasta_output" + echo " type: file, output, file must exist" + echo " Concatenated FASTA file." + echo "" + echo " --gtf_output" + echo " type: file, output, file must exist" + echo " Concatenated GTF file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/cat_fastq/.config.vsh.yaml b/target/executable/cat_fastq/.config.vsh.yaml index e28ea1e..b2e9f4f 100644 --- a/target/executable/cat_fastq/.config.vsh.yaml +++ b/target/executable/cat_fastq/.config.vsh.yaml @@ -77,6 +77,9 @@ info: - "modules/nf-core/cat/fastq/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -173,15 +176,15 @@ build_info: engine: "docker|native" output: "target/executable/cat_fastq" executable: "target/executable/cat_fastq/cat_fastq" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -192,7 +195,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/cat_fastq/cat_fastq b/target/executable/cat_fastq/cat_fastq index 87aee80..bc976b8 100755 --- a/target/executable/cat_fastq/cat_fastq +++ b/target/executable/cat_fastq/cat_fastq @@ -2,7 +2,7 @@ # cat_fastq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,32 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cat_fastq main" - echo "" - echo "Concatenate multiple fastq files" - echo "" - echo "Input:" - echo " --read_1" - echo " type: file, multiple values allowed, file must exist" - echo " Read 1 fastq files to be concatenated" - echo "" - echo " --read_2" - echo " type: file, multiple values allowed, file must exist" - echo " Read 2 fastq files to be concatenated" - echo "" - echo "Output:" - echo " --fastq_1" - echo " type: file, output, file must exist" - echo " default: \$id_r1.fastq" - echo " Concatenated read 1 fastq" - echo "" - echo " --fastq_2" - echo " type: file, output" - echo " default: \$id_r2.fastq" - echo " Concatenated read 2 fastq" -} # initialise variables VIASH_MODE='run' @@ -472,9 +446,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component cat_fastq" -LABEL org.opencontainers.image.created="2024-12-05T14:40:10Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -589,6 +563,58 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cat_fastq main" + echo "" + echo "Concatenate multiple fastq files" + echo "" + echo "Input:" + echo " --read_1" + echo " type: file, multiple values allowed, file must exist" + echo " Read 1 fastq files to be concatenated" + echo "" + echo " --read_2" + echo " type: file, multiple values allowed, file must exist" + echo " Read 2 fastq files to be concatenated" + echo "" + echo "Output:" + echo " --fastq_1" + echo " type: file, output, file must exist" + echo " default: \$id_r1.fastq" + echo " Concatenated read 1 fastq" + echo "" + echo " --fastq_2" + echo " type: file, output" + echo " default: \$id_r2.fastq" + echo " Concatenated read 2 fastq" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/copy_if_exists/.config.vsh.yaml b/target/executable/copy_if_exists/.config.vsh.yaml new file mode 100644 index 0000000..c02c648 --- /dev/null +++ b/target/executable/copy_if_exists/.config.vsh.yaml @@ -0,0 +1,183 @@ +name: "copy_if_exists" +version: "main" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--required_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/required_file.txt" + must_exist: false + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--optional_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/optional_file.txt" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "copy_if_exists_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "required_file.txt" +- type: "file" + path: "optional_file.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "main" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/extra/copy_if_exists/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/copy_if_exists" + executable: "target/executable/copy_if_exists/copy_if_exists" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "main" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "main" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.2" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + organization: "vsh" diff --git a/target/executable/copy_if_exists/copy_if_exists b/target/executable/copy_if_exists/copy_if_exists new file mode 100755 index 0000000..4b60a55 --- /dev/null +++ b/target/executable/copy_if_exists/copy_if_exists @@ -0,0 +1,1136 @@ +#!/usr/bin/env bash + +# copy_if_exists main +# +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="copy_if_exists" +VIASH_META_FUNCTIONALITY_NAME="copy_if_exists" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 +ENTRYPOINT [] +LABEL org.opencontainers.image.description="Companion container for running component copy_if_exists" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" +LABEL org.opencontainers.image.version="main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "copy_if_exists main" + echo "" + echo "Input:" + echo " --required_file" + echo " type: file, required parameter" + echo " example: /tmp/rnaseq_workflow_config/required_file.txt" + echo "" + echo " --optional_file" + echo " type: file" + echo " example: /tmp/rnaseq_workflow_config/optional_file.txt" + echo "" + echo "Ouput:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: copy_if_exists_output" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "copy_if_exists main" + exit + ;; + --required_file) + [ -n "$VIASH_PAR_REQUIRED_FILE" ] && ViashError Bad arguments for option \'--required_file\': \'$VIASH_PAR_REQUIRED_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REQUIRED_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --required_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --required_file=*) + [ -n "$VIASH_PAR_REQUIRED_FILE" ] && ViashError Bad arguments for option \'--required_file=*\': \'$VIASH_PAR_REQUIRED_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REQUIRED_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --optional_file) + [ -n "$VIASH_PAR_OPTIONAL_FILE" ] && ViashError Bad arguments for option \'--optional_file\': \'$VIASH_PAR_OPTIONAL_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OPTIONAL_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --optional_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --optional_file=*) + [ -n "$VIASH_PAR_OPTIONAL_FILE" ] && ViashError Bad arguments for option \'--optional_file=*\': \'$VIASH_PAR_OPTIONAL_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OPTIONAL_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/copy_if_exists:main' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then + ViashError '--required_file' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="copy_if_exists_output" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_REQUIRED_FILE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REQUIRED_FILE")" ) + VIASH_PAR_REQUIRED_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_REQUIRED_FILE") +fi +if [ ! -z "$VIASH_PAR_OPTIONAL_FILE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OPTIONAL_FILE")" ) + VIASH_PAR_OPTIONAL_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_OPTIONAL_FILE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-copy_if_exists-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then echo "${VIASH_PAR_REQUIRED_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_required_file='&'#" ; else echo "# par_required_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OPTIONAL_FILE+x} ]; then echo "${VIASH_PAR_OPTIONAL_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_optional_file='&'#" ; else echo "# par_optional_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \$par_output + +# This file is checked by the Nextflow module wrapper +cp \$par_required_file "\$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \$par_optional_file ]; then + echo "No optional_file provided, using the default" + cp \$meta_resources_dir/optional_file.txt "\$par_output" +else + echo "Optional file provided" + if [ -f \$par_optional_file ]; then + cp \$par_optional_file "\$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_REQUIRED_FILE" ]; then + VIASH_PAR_REQUIRED_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_REQUIRED_FILE") + fi + if [ ! -z "$VIASH_PAR_OPTIONAL_FILE" ]; then + VIASH_PAR_OPTIONAL_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_OPTIONAL_FILE") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/copy_if_exists/nextflow_labels.config b/target/executable/copy_if_exists/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/executable/copy_if_exists/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/executable/copy_if_exists/optional_file.txt b/target/executable/copy_if_exists/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/target/executable/copy_if_exists/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/target/executable/copy_if_exists/required_file.txt b/target/executable/copy_if_exists/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/target/executable/copy_if_exists/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/target/executable/deseq2_qc/.config.vsh.yaml b/target/executable/deseq2_qc/.config.vsh.yaml index 1820320..f3d11fc 100644 --- a/target/executable/deseq2_qc/.config.vsh.yaml +++ b/target/executable/deseq2_qc/.config.vsh.yaml @@ -133,6 +133,9 @@ info: - "modules/local/deseq2_qc.nf" last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -215,11 +218,20 @@ runners: engines: - type: "docker" id: "docker" - image: "rocker/r2u:22.04" + image: "debian:latest" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: + - type: "apt" + packages: + - "libcurl4-openssl-dev" + - "r-base" + - "r-base-core" + - "libxml2-dev" + - "procps" + - "libssl-dev" + interactive: false - type: "r" cran: - "optparse" @@ -231,6 +243,7 @@ engines: bioc: - "DESeq2" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -241,15 +254,15 @@ build_info: engine: "docker|native" output: "target/executable/deseq2_qc" executable: "target/executable/deseq2_qc/deseq2_qc" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -260,7 +273,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/deseq2_qc/deseq2_qc b/target/executable/deseq2_qc/deseq2_qc index a0563d1..f9bbbf8 100755 --- a/target/executable/deseq2_qc/deseq2_qc +++ b/target/executable/deseq2_qc/deseq2_qc @@ -2,7 +2,7 @@ # deseq2_qc main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,61 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "deseq2_qc main" - echo "" - echo "run deseq2, perform pca, generate heatmaps and scatterplots for samples in the" - echo "counts files" - echo "" - echo "input:" - echo " --counts" - echo " type: file, required parameter, file must exist" - echo " Count file matrix where rows are genes and columns are samples." - echo "" - echo " --vst" - echo " type: boolean" - echo " default: false" - echo " Use vst transformation instead of rlog with .DESeq2" - echo "" - echo " --count_col" - echo " type: integer" - echo " default: 3" - echo " First column containing sample count data." - echo "" - echo " --id_col" - echo " type: integer" - echo " default: 1" - echo " Column containing identifiers to be used." - echo "" - echo " --sample_suffix" - echo " type: string" - echo " default:" - echo " Suffix to remove after sample name in columns e.g. '.rmDup.bam' if" - echo " 'DRUG_R1.rmDup.bam'." - echo "" - echo " --outprefix" - echo " type: string" - echo " default: deseq2" - echo " Output prefix" - echo "" - echo " --label" - echo " type: string" - echo " Label to used in MultiQC report" - echo "" - echo "Output:" - echo " --outdir" - echo " type: file, output, file must exist" - echo " default: deseq2" - echo "" - echo " --pca_multiqc" - echo " type: file, output, file must exist" - echo " default: deseq2.pca.vals_mqc.tsv" - echo "" - echo " --sample_dists_multiqc" - echo " type: file, output, file must exist" - echo " default: deseq2.sample.dists_mqc.tsv" -} # initialise variables VIASH_MODE='run' @@ -498,17 +443,21 @@ function ViashDockerfile { if [[ "$engine_id" == "docker" ]]; then cat << 'VIASHDOCKER' -FROM rocker/r2u:22.04 +FROM debian:latest ENTRYPOINT [] -RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ - Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ - Rscript -e 'if (!requireNamespace("DESeq2", quietly = TRUE)) BiocManager::install("DESeq2")' && \ - Rscript -e 'remotes::install_cran(c("optparse", "ggplot2", "RColorBrewer", "pheatmap", "stringr", "matrixStats"), repos = "https://cran.rstudio.com")' +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libcurl4-openssl-dev r-base r-base-core libxml2-dev procps libssl-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("DESeq2", quietly = TRUE)) BiocManager::install("DESeq2")' && \ + Rscript -e 'options(warn = 2); remotes::install_cran(c("optparse", "ggplot2", "RColorBrewer", "pheatmap", "stringr", "matrixStats"), repos = "https://cran.rstudio.com")' LABEL org.opencontainers.image.description="Companion container for running component deseq2_qc" -LABEL org.opencontainers.image.created="2024-12-05T14:40:10Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:29Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -623,6 +572,87 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "deseq2_qc main" + echo "" + echo "run deseq2, perform pca, generate heatmaps and scatterplots for samples in the" + echo "counts files" + echo "" + echo "input:" + echo " --counts" + echo " type: file, required parameter, file must exist" + echo " Count file matrix where rows are genes and columns are samples." + echo "" + echo " --vst" + echo " type: boolean" + echo " default: false" + echo " Use vst transformation instead of rlog with .DESeq2" + echo "" + echo " --count_col" + echo " type: integer" + echo " default: 3" + echo " First column containing sample count data." + echo "" + echo " --id_col" + echo " type: integer" + echo " default: 1" + echo " Column containing identifiers to be used." + echo "" + echo " --sample_suffix" + echo " type: string" + echo " default:" + echo " Suffix to remove after sample name in columns e.g. '.rmDup.bam' if" + echo " 'DRUG_R1.rmDup.bam'." + echo "" + echo " --outprefix" + echo " type: string" + echo " default: deseq2" + echo " Output prefix" + echo "" + echo " --label" + echo " type: string" + echo " Label to used in MultiQC report" + echo "" + echo "Output:" + echo " --outdir" + echo " type: file, output, file must exist" + echo " default: deseq2" + echo "" + echo " --pca_multiqc" + echo " type: file, output, file must exist" + echo " default: deseq2.pca.vals_mqc.tsv" + echo "" + echo " --sample_dists_multiqc" + echo " type: file, output, file must exist" + echo " default: deseq2.sample.dists_mqc.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' @@ -1325,7 +1355,7 @@ saveRDS(dds, file = sub("\\\\.dds\\\\.RData\$", ".rds", DDSFile)) ##' @author Gavin Kelly plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { - rv <- rowVars(assay(object, assay)) + rv <- rowVars(assay(object, assay), useNames = TRUE) select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) percentVar <- pca\$sdev^2 / sum(pca\$sdev^2) diff --git a/target/executable/dupradar/.config.vsh.yaml b/target/executable/dupradar/.config.vsh.yaml index c6ce725..89f2d4a 100644 --- a/target/executable/dupradar/.config.vsh.yaml +++ b/target/executable/dupradar/.config.vsh.yaml @@ -165,6 +165,9 @@ info: - "modules/local/dupradar.nf" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -260,6 +263,7 @@ engines: bioc: - "dupRadar" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -270,15 +274,15 @@ build_info: engine: "docker|native" output: "target/executable/dupradar" executable: "target/executable/dupradar/dupradar" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -289,7 +293,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/dupradar/dupradar b/target/executable/dupradar/dupradar index 20f5ea0..653672a 100755 --- a/target/executable/dupradar/dupradar +++ b/target/executable/dupradar/dupradar @@ -2,7 +2,7 @@ # dupradar main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,73 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "dupradar main" - echo "" - echo "Assessment of duplication rates in RNA-Seq datasets" - echo "" - echo "Input:" - echo " --id" - echo " type: string" - echo " Sample ID" - echo "" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " path to input alignment file in BAM format" - echo "" - echo " --gtf_annotation" - echo " type: file, required parameter, file must exist" - echo " path to GTF annotation file." - echo "" - echo " --paired" - echo " type: boolean" - echo " add flag if input alignment file consists of paired reads" - echo "" - echo " --strandedness" - echo " type: string" - echo " choices: [ forward, reverse, unstranded ]" - echo " strandedness of input bam file reads (forward, reverse or unstranded" - echo " (default, applicable to paired reads))" - echo "" - echo "Output:" - echo " --output_dupmatrix" - echo " type: file, output, file must exist" - echo " default: \$id.dup_matrix.txt" - echo " path to output file (txt) of duplicate tag counts" - echo "" - echo " --output_dup_intercept_mqc" - echo " type: file, output, file must exist" - echo " default: \$id.dup_intercept_mqc.txt" - echo " path to output file (txt) of multiqc intercept value DupRadar" - echo "" - echo " --output_duprate_exp_boxplot" - echo " type: file, output, file must exist" - echo " default: \$id.duprate_exp_boxplot.pdf" - echo " path to output file (pdf) of distribution of expression box plot" - echo "" - echo " --output_duprate_exp_densplot" - echo " type: file, output, file must exist" - echo " default: \$id.duprate_exp_densityplot.pdf" - echo " path to output file (pdf) of 2D density scatter plot of duplicate tag" - echo " counts" - echo "" - echo " --output_duprate_exp_denscurve_mqc" - echo " type: file, output, file must exist" - echo " default: \$id.duprate_exp_density_curve_mqc.txt" - echo " path to output file (pdf) of density curve of gene duplication multiqc" - echo "" - echo " --output_expression_histogram" - echo " type: file, output, file must exist" - echo " default: \$id.expression_hist.pdf" - echo " path to output file (pdf) of distribution of RPK values per gene" - echo " histogram" - echo "" - echo " --output_intercept_slope" - echo " type: file, output, file must exist" - echo " default: \$id.intercept_slope.txt" - echo " output file (txt) with progression of duplication rate value" -} # initialise variables VIASH_MODE='run' @@ -516,13 +449,13 @@ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y r-base && \ rm -rf /var/lib/apt/lists/* -RUN Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ - Rscript -e 'if (!requireNamespace("dupRadar", quietly = TRUE)) BiocManager::install("dupRadar")' +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("dupRadar", quietly = TRUE)) BiocManager::install("dupRadar")' LABEL org.opencontainers.image.description="Companion container for running component dupradar" -LABEL org.opencontainers.image.created="2024-12-05T14:40:12Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:30Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -637,6 +570,99 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "dupradar main" + echo "" + echo "Assessment of duplication rates in RNA-Seq datasets" + echo "" + echo "Input:" + echo " --id" + echo " type: string" + echo " Sample ID" + echo "" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " path to input alignment file in BAM format" + echo "" + echo " --gtf_annotation" + echo " type: file, required parameter, file must exist" + echo " path to GTF annotation file." + echo "" + echo " --paired" + echo " type: boolean" + echo " add flag if input alignment file consists of paired reads" + echo "" + echo " --strandedness" + echo " type: string" + echo " choices: [ forward, reverse, unstranded ]" + echo " strandedness of input bam file reads (forward, reverse or unstranded" + echo " (default, applicable to paired reads))" + echo "" + echo "Output:" + echo " --output_dupmatrix" + echo " type: file, output, file must exist" + echo " default: \$id.dup_matrix.txt" + echo " path to output file (txt) of duplicate tag counts" + echo "" + echo " --output_dup_intercept_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.dup_intercept_mqc.txt" + echo " path to output file (txt) of multiqc intercept value DupRadar" + echo "" + echo " --output_duprate_exp_boxplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_boxplot.pdf" + echo " path to output file (pdf) of distribution of expression box plot" + echo "" + echo " --output_duprate_exp_densplot" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_densityplot.pdf" + echo " path to output file (pdf) of 2D density scatter plot of duplicate tag" + echo " counts" + echo "" + echo " --output_duprate_exp_denscurve_mqc" + echo " type: file, output, file must exist" + echo " default: \$id.duprate_exp_density_curve_mqc.txt" + echo " path to output file (pdf) of density curve of gene duplication multiqc" + echo "" + echo " --output_expression_histogram" + echo " type: file, output, file must exist" + echo " default: \$id.expression_hist.pdf" + echo " path to output file (pdf) of distribution of RPK values per gene" + echo " histogram" + echo "" + echo " --output_intercept_slope" + echo " type: file, output, file must exist" + echo " default: \$id.intercept_slope.txt" + echo " output file (txt) with progression of duplication rate value" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/getchromsizes/.config.vsh.yaml b/target/executable/getchromsizes/.config.vsh.yaml index f1b64e0..6a2ca8b 100644 --- a/target/executable/getchromsizes/.config.vsh.yaml +++ b/target/executable/getchromsizes/.config.vsh.yaml @@ -67,6 +67,9 @@ info: - "modules/nf-core/custom/getchromsizes/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -171,15 +174,15 @@ build_info: engine: "docker|native" output: "target/executable/getchromsizes" executable: "target/executable/getchromsizes/getchromsizes" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -190,7 +193,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/getchromsizes/getchromsizes b/target/executable/getchromsizes/getchromsizes index b03096d..88f3628 100755 --- a/target/executable/getchromsizes/getchromsizes +++ b/target/executable/getchromsizes/getchromsizes @@ -2,7 +2,7 @@ # getchromsizes main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,30 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "getchromsizes main" - echo "" - echo "Generates a FASTA file of chromosome sizes and a fasta index file." - echo "" - echo "Input:" - echo " --fasta" - echo " type: file, file must exist" - echo " Genome fasta files" - echo "" - echo "Output:" - echo " --sizes" - echo " type: file, output, file must exist" - echo " File containing chromosome lengths" - echo "" - echo " --fai" - echo " type: file, output, file must exist" - echo " FASTA index file" - echo "" - echo " --gzi" - echo " type: file, output, file must exist" - echo " Optional gzip index file for compressed inputs" -} # initialise variables VIASH_MODE='run' @@ -480,9 +456,9 @@ make && \ make install LABEL org.opencontainers.image.description="Companion container for running component getchromsizes" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -597,6 +573,56 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "getchromsizes main" + echo "" + echo "Generates a FASTA file of chromosome sizes and a fasta index file." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, file must exist" + echo " Genome fasta files" + echo "" + echo "Output:" + echo " --sizes" + echo " type: file, output, file must exist" + echo " File containing chromosome lengths" + echo "" + echo " --fai" + echo " type: file, output, file must exist" + echo " FASTA index file" + echo "" + echo " --gzi" + echo " type: file, output, file must exist" + echo " Optional gzip index file for compressed inputs" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/gtf2bed/.config.vsh.yaml b/target/executable/gtf2bed/.config.vsh.yaml index da6351b..c9eb055 100644 --- a/target/executable/gtf2bed/.config.vsh.yaml +++ b/target/executable/gtf2bed/.config.vsh.yaml @@ -48,6 +48,9 @@ info: - "modules/local/gtf2bed.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -149,15 +152,15 @@ build_info: engine: "docker|native" output: "target/executable/gtf2bed" executable: "target/executable/gtf2bed/gtf2bed" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -168,7 +171,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/gtf2bed/gtf2bed b/target/executable/gtf2bed/gtf2bed index c466e51..5104db8 100755 --- a/target/executable/gtf2bed/gtf2bed +++ b/target/executable/gtf2bed/gtf2bed @@ -2,7 +2,7 @@ # gtf2bed main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,22 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "gtf2bed main" - echo "" - echo "Create BED annotation file from GTF." - echo "" - echo "Input:" - echo " --gtf" - echo " type: file, required parameter, file must exist" - echo " A reference file in GTF format." - echo "" - echo " Output:" - echo " --bed_output" - echo " type: file, required parameter, output, file must exist" - echo " BED file resulting from the conversion of the GTF input file." -} # initialise variables VIASH_MODE='run' @@ -466,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component gtf2bed" -LABEL org.opencontainers.image.created="2024-12-05T14:40:13Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -583,6 +567,48 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gtf2bed main" + echo "" + echo "Create BED annotation file from GTF." + echo "" + echo "Input:" + echo " --gtf" + echo " type: file, required parameter, file must exist" + echo " A reference file in GTF format." + echo "" + echo " Output:" + echo " --bed_output" + echo " type: file, required parameter, output, file must exist" + echo " BED file resulting from the conversion of the GTF input file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/gtf_filter/.config.vsh.yaml b/target/executable/gtf_filter/.config.vsh.yaml index 13fc9cf..0b34835 100644 --- a/target/executable/gtf_filter/.config.vsh.yaml +++ b/target/executable/gtf_filter/.config.vsh.yaml @@ -63,6 +63,9 @@ info: - "modules/local/gtf_filter.nf" last_sha: "1c6012ecbb087014ea4b8f0f3d39b874850277a8" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -159,15 +162,15 @@ build_info: engine: "docker|native" output: "target/executable/gtf_filter" executable: "target/executable/gtf_filter/gtf_filter" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -178,7 +181,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/gtf_filter/gtf_filter b/target/executable/gtf_filter/gtf_filter index b54ee7d..fec7074 100755 --- a/target/executable/gtf_filter/gtf_filter +++ b/target/executable/gtf_filter/gtf_filter @@ -2,7 +2,7 @@ # gtf_filter main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,30 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "gtf_filter main" - echo "" - echo "Filters a GTF file based on sequence names in a FASTA file." - echo "" - echo "Input:" - echo " --fasta" - echo " type: file, file must exist" - echo " Genome fasta file" - echo "" - echo " --gtf" - echo " type: file, file must exist" - echo " GTF file" - echo "" - echo " --skip_transcript_id_check" - echo " type: boolean_true" - echo " Skip checking for transcript IDs in the GTF file." - echo "" - echo " Output:" - echo " --filtered_gtf" - echo " type: file, output, file must exist" - echo " Filtered GTF file containing only sequences in the FASTA file" -} # initialise variables VIASH_MODE='run' @@ -470,9 +446,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component gtf_filter" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:29Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -587,6 +563,56 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gtf_filter main" + echo "" + echo "Filters a GTF file based on sequence names in a FASTA file." + echo "" + echo "Input:" + echo " --fasta" + echo " type: file, file must exist" + echo " Genome fasta file" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo " GTF file" + echo "" + echo " --skip_transcript_id_check" + echo " type: boolean_true" + echo " Skip checking for transcript IDs in the GTF file." + echo "" + echo " Output:" + echo " --filtered_gtf" + echo " type: file, output, file must exist" + echo " Filtered GTF file containing only sequences in the FASTA file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/gunzip/.config.vsh.yaml b/target/executable/gunzip/.config.vsh.yaml index 125f575..3b37047 100644 --- a/target/executable/gunzip/.config.vsh.yaml +++ b/target/executable/gunzip/.config.vsh.yaml @@ -47,6 +47,9 @@ info: - "modules/nf-core/gunzip/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -148,15 +151,15 @@ build_info: engine: "docker|native" output: "target/executable/gunzip" executable: "target/executable/gunzip/gunzip" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -167,7 +170,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/gunzip/gunzip b/target/executable/gunzip/gunzip index 84ebcc3..2edc58a 100755 --- a/target/executable/gunzip/gunzip +++ b/target/executable/gunzip/gunzip @@ -2,7 +2,7 @@ # gunzip main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,22 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "gunzip main" - echo "" - echo "Compress or uncompress a file or list of files." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Path of file to be uncompressed" - echo "" - echo "Output:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Decompressed file." -} # initialise variables VIASH_MODE='run' @@ -466,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component gunzip" -LABEL org.opencontainers.image.created="2024-12-05T14:40:07Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -583,6 +567,48 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gunzip main" + echo "" + echo "Compress or uncompress a file or list of files." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Path of file to be uncompressed" + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Decompressed file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/multiqc_custom_biotype/.config.vsh.yaml b/target/executable/multiqc_custom_biotype/.config.vsh.yaml index 35d6d29..1f6e64a 100644 --- a/target/executable/multiqc_custom_biotype/.config.vsh.yaml +++ b/target/executable/multiqc_custom_biotype/.config.vsh.yaml @@ -73,6 +73,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -169,15 +172,15 @@ build_info: engine: "docker|native" output: "target/executable/multiqc_custom_biotype" executable: "target/executable/multiqc_custom_biotype/multiqc_custom_biotype" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -188,7 +191,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/multiqc_custom_biotype/multiqc_custom_biotype b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype index 4b7d5f5..35004b0 100755 --- a/target/executable/multiqc_custom_biotype/multiqc_custom_biotype +++ b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype @@ -2,7 +2,7 @@ # multiqc_custom_biotype main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,36 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "multiqc_custom_biotype main" - echo "" - echo "Calculate features percentage for biotype counts" - echo "" - echo "Input:" - echo " --biocounts" - echo " type: file, file must exist" - echo " File with all biocounts" - echo "" - echo " --id" - echo " type: string" - echo " default: \$id" - echo " Sample name" - echo "" - echo " --features" - echo " type: string" - echo " default: rRNA" - echo " Features to count" - echo "" - echo "Output:" - echo " --featurecounts_multiqc" - echo " type: file, output, file must exist" - echo " default: \$id.biotype_counts_mqc.tsv" - echo "" - echo " --featurecounts_rrna_multiqc" - echo " type: file, output, file must exist" - echo " default: \$id.biotype_counts_rrna_mqc.tsv" -} # initialise variables VIASH_MODE='run' @@ -476,9 +446,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component multiqc_custom_biotype" -LABEL org.opencontainers.image.created="2024-12-05T14:40:10Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -593,6 +563,62 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multiqc_custom_biotype main" + echo "" + echo "Calculate features percentage for biotype counts" + echo "" + echo "Input:" + echo " --biocounts" + echo " type: file, file must exist" + echo " File with all biocounts" + echo "" + echo " --id" + echo " type: string" + echo " default: \$id" + echo " Sample name" + echo "" + echo " --features" + echo " type: string" + echo " default: rRNA" + echo " Features to count" + echo "" + echo "Output:" + echo " --featurecounts_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id.biotype_counts_mqc.tsv" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, output, file must exist" + echo " default: \$id.biotype_counts_rrna_mqc.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/picard_markduplicates/.config.vsh.yaml b/target/executable/picard_markduplicates/.config.vsh.yaml index 5632556..a2a1f01 100644 --- a/target/executable/picard_markduplicates/.config.vsh.yaml +++ b/target/executable/picard_markduplicates/.config.vsh.yaml @@ -107,6 +107,9 @@ info: - "modules/nf-core/picard/markduplicates/meta.yml" last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -211,15 +214,15 @@ build_info: engine: "docker|native" output: "target/executable/picard_markduplicates" executable: "target/executable/picard_markduplicates/picard_markduplicates" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -230,7 +233,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/picard_markduplicates/picard_markduplicates b/target/executable/picard_markduplicates/picard_markduplicates index ad4fef5..b8dbb8a 100755 --- a/target/executable/picard_markduplicates/picard_markduplicates +++ b/target/executable/picard_markduplicates/picard_markduplicates @@ -2,7 +2,7 @@ # picard_markduplicates main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,48 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "picard_markduplicates main" - echo "" - echo "Locate and tag duplicate reads in a BAM file" - echo "" - echo "Input:" - echo " --bam" - echo " type: file, file must exist" - echo " Input BAM file" - echo "" - echo " --fasta" - echo " type: file, file must exist" - echo " Reference genome FASTA file" - echo "" - echo " --fai" - echo " type: file, file must exist" - echo " Reference genome FASTA index" - echo "" - echo " --extra_picard_args" - echo " type: string" - echo " default: --ASSUME_SORTED true --REMOVE_DUPLICATES false" - echo "--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" - echo " Additional argument to be passed to Picard MarkDuplicates" - echo "" - echo "Output:" - echo " --output_bam" - echo " type: file, output, file must exist" - echo " default: \$id.MarkDuplicates.bam" - echo " BAM file with duplicate reads marked/removed" - echo "" - echo " --bai" - echo " type: file, output" - echo " default: \$id.MarkDuplicates.bam.bai" - echo " An optional BAM index file. If desired, --CREATE_INDEX must be passed as" - echo " a flag" - echo "" - echo " --metrics" - echo " type: file, output, file must exist" - echo " default: \$id.MarkDuplicates.metrics.txt" - echo " Duplicate metrics file generated by picard" -} # initialise variables VIASH_MODE='run' @@ -494,9 +452,9 @@ wget --no-check-certificate https://github.com/broadinstitute/picard/releases/do mv picard.jar /usr/local/bin LABEL org.opencontainers.image.description="Companion container for running component picard_markduplicates" -LABEL org.opencontainers.image.created="2024-12-05T14:40:08Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:30Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -611,6 +569,74 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "picard_markduplicates main" + echo "" + echo "Locate and tag duplicate reads in a BAM file" + echo "" + echo "Input:" + echo " --bam" + echo " type: file, file must exist" + echo " Input BAM file" + echo "" + echo " --fasta" + echo " type: file, file must exist" + echo " Reference genome FASTA file" + echo "" + echo " --fai" + echo " type: file, file must exist" + echo " Reference genome FASTA index" + echo "" + echo " --extra_picard_args" + echo " type: string" + echo " default: --ASSUME_SORTED true --REMOVE_DUPLICATES false" + echo "--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" + echo " Additional argument to be passed to Picard MarkDuplicates" + echo "" + echo "Output:" + echo " --output_bam" + echo " type: file, output, file must exist" + echo " default: \$id.MarkDuplicates.bam" + echo " BAM file with duplicate reads marked/removed" + echo "" + echo " --bai" + echo " type: file, output" + echo " default: \$id.MarkDuplicates.bam.bai" + echo " An optional BAM index file. If desired, --CREATE_INDEX must be passed as" + echo " a flag" + echo "" + echo " --metrics" + echo " type: file, output, file must exist" + echo " default: \$id.MarkDuplicates.metrics.txt" + echo " Duplicate metrics file generated by picard" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/prepare_multiqc_input/.config.vsh.yaml b/target/executable/prepare_multiqc_input/.config.vsh.yaml index f8af77d..2788bf1 100644 --- a/target/executable/prepare_multiqc_input/.config.vsh.yaml +++ b/target/executable/prepare_multiqc_input/.config.vsh.yaml @@ -320,6 +320,9 @@ resources: description: "Prepare directory with all the input files for MultiQC.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -416,15 +419,15 @@ build_info: engine: "docker|native" output: "target/executable/prepare_multiqc_input" executable: "target/executable/prepare_multiqc_input/prepare_multiqc_input" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -435,7 +438,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/prepare_multiqc_input/prepare_multiqc_input b/target/executable/prepare_multiqc_input/prepare_multiqc_input index 750e0d9..d493075 100755 --- a/target/executable/prepare_multiqc_input/prepare_multiqc_input +++ b/target/executable/prepare_multiqc_input/prepare_multiqc_input @@ -2,7 +2,7 @@ # prepare_multiqc_input main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,118 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "prepare_multiqc_input main" - echo "" - echo "Prepare directory with all the input files for MultiQC." - echo "" - echo "Input:" - echo " --fail_trimming_multiqc" - echo " type: string" - echo "" - echo " --fail_mapping_multiqc" - echo " type: string" - echo "" - echo " --fail_strand_multiqc" - echo " type: string" - echo "" - echo " --fastqc_raw_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --fastqc_trim_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --trim_log_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --sortmerna_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --star_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --salmon_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --samtools_stats" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --samtools_flagstat" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --samtools_idxstats" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --markduplicates_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --pseudo_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --featurecounts_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --featurecounts_rrna_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --aligner_pca_multiqc" - echo " type: file, file must exist" - echo "" - echo " --aligner_clustering_multiqc" - echo " type: file, file must exist" - echo "" - echo " --pseudo_aligner_pca_multiqc" - echo " type: file, file must exist" - echo "" - echo " --pseudo_aligner_clustering_multiqc" - echo " type: file, file must exist" - echo "" - echo " --preseq_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --qualimap_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --dupradar_output_dup_intercept_mqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --dupradar_output_duprate_exp_denscurve_mqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --bamstat_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --inferexperiment_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --innerdistance_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --junctionannotation_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --junctionsaturation_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --readdistribution_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --readduplication_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --tin_multiqc" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --multiqc_config" - echo " type: file, file must exist" - echo " Custom multiqc configuration file" - echo "" - echo "Ouput:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: multiqc_input" -} # initialise variables VIASH_MODE='run' @@ -558,9 +446,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component prepare_multiqc_input" -LABEL org.opencontainers.image.created="2024-12-05T14:40:06Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -675,6 +563,144 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "prepare_multiqc_input main" + echo "" + echo "Prepare directory with all the input files for MultiQC." + echo "" + echo "Input:" + echo " --fail_trimming_multiqc" + echo " type: string" + echo "" + echo " --fail_mapping_multiqc" + echo " type: string" + echo "" + echo " --fail_strand_multiqc" + echo " type: string" + echo "" + echo " --fastqc_raw_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --fastqc_trim_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --trim_log_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --sortmerna_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --star_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --salmon_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_stats" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_flagstat" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --samtools_idxstats" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --markduplicates_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --pseudo_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --featurecounts_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --featurecounts_rrna_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --aligner_pca_multiqc" + echo " type: file, file must exist" + echo "" + echo " --aligner_clustering_multiqc" + echo " type: file, file must exist" + echo "" + echo " --pseudo_aligner_pca_multiqc" + echo " type: file, file must exist" + echo "" + echo " --pseudo_aligner_clustering_multiqc" + echo " type: file, file must exist" + echo "" + echo " --preseq_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --qualimap_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --dupradar_output_dup_intercept_mqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --dupradar_output_duprate_exp_denscurve_mqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --bamstat_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --inferexperiment_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --innerdistance_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --junctionannotation_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --junctionsaturation_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --readdistribution_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --readduplication_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --tin_multiqc" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --multiqc_config" + echo " type: file, file must exist" + echo " Custom multiqc configuration file" + echo "" + echo "Ouput:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: multiqc_input" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml index a56f702..90ba93a 100644 --- a/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml +++ b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml @@ -46,6 +46,9 @@ info: - "modules/local/preprocess_transcripts_fasta_gencode.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -142,15 +145,15 @@ build_info: engine: "docker|native" output: "target/executable/preprocess_transcripts_fasta" executable: "target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -161,7 +164,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta index ba18b17..ca37a01 100755 --- a/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta +++ b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta @@ -2,7 +2,7 @@ # preprocess_transcripts_fasta main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,22 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "preprocess_transcripts_fasta main" - echo "" - echo "Process transcripts FASTA if GTF file is GENOCODE format" - echo "" - echo "Input:" - echo " --transcript_fasta" - echo " type: file, required parameter, file must exist" - echo " Path of transcripts FASTA file" - echo "" - echo "Output:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Path of processed output FASTA file." -} # initialise variables VIASH_MODE='run' @@ -462,9 +446,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component preprocess_transcripts_fasta" -LABEL org.opencontainers.image.created="2024-12-05T14:40:08Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -579,6 +563,48 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "preprocess_transcripts_fasta main" + echo "" + echo "Process transcripts FASTA if GTF file is GENOCODE format" + echo "" + echo "Input:" + echo " --transcript_fasta" + echo " type: file, required parameter, file must exist" + echo " Path of transcripts FASTA file" + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Path of processed output FASTA file." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/preseq_lcextrap/.config.vsh.yaml b/target/executable/preseq_lcextrap/.config.vsh.yaml index 079af22..7c62905 100644 --- a/target/executable/preseq_lcextrap/.config.vsh.yaml +++ b/target/executable/preseq_lcextrap/.config.vsh.yaml @@ -67,6 +67,9 @@ info: - "modules/nf-core/preseq/lcextrap/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,15 +198,15 @@ build_info: engine: "docker|native" output: "target/executable/preseq_lcextrap" executable: "target/executable/preseq_lcextrap/preseq_lcextrap" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -214,7 +217,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/preseq_lcextrap/preseq_lcextrap b/target/executable/preseq_lcextrap/preseq_lcextrap index a294a76..f26f32e 100755 --- a/target/executable/preseq_lcextrap/preseq_lcextrap +++ b/target/executable/preseq_lcextrap/preseq_lcextrap @@ -2,7 +2,7 @@ # preseq_lcextrap main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,30 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "preseq_lcextrap main" - echo "" - echo "Computing the expected future yield of distinct reads and bounds on the number" - echo "of total distinct reads in the library and the associated confidence intervals." - echo "" - echo "Input:" - echo " --input" - echo " type: file, file must exist" - echo " Input genome BAM/BED file" - echo "" - echo " --extra_preseq_args" - echo " type: string" - echo "" - echo " --paired" - echo " type: boolean" - echo " Paired-end reads?" - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.lc_extrap.txt" -} # initialise variables VIASH_MODE='run' @@ -495,9 +471,9 @@ mkdir build && cd build && \ make && make install && make HAVE_HTSLIB=1 all LABEL org.opencontainers.image.description="Companion container for running component preseq_lcextrap" -LABEL org.opencontainers.image.created="2024-12-05T14:40:07Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -612,6 +588,56 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "preseq_lcextrap main" + echo "" + echo "Computing the expected future yield of distinct reads and bounds on the number" + echo "of total distinct reads in the library and the associated confidence intervals." + echo "" + echo "Input:" + echo " --input" + echo " type: file, file must exist" + echo " Input genome BAM/BED file" + echo "" + echo " --extra_preseq_args" + echo " type: string" + echo "" + echo " --paired" + echo " type: boolean" + echo " Paired-end reads?" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: \$id.lc_extrap.txt" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rsem_merge_counts/.config.vsh.yaml b/target/executable/rsem_merge_counts/.config.vsh.yaml index 9d5d363..b124b28 100644 --- a/target/executable/rsem_merge_counts/.config.vsh.yaml +++ b/target/executable/rsem_merge_counts/.config.vsh.yaml @@ -89,6 +89,9 @@ info: - "modules/local/rsem_merge_counts/main.nf" last_sha: "311279532694ce7520164ce4d65a388c0cd11f60" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -185,15 +188,15 @@ build_info: engine: "docker|native" output: "target/executable/rsem_merge_counts" executable: "target/executable/rsem_merge_counts/rsem_merge_counts" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -204,7 +207,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rsem_merge_counts/rsem_merge_counts b/target/executable/rsem_merge_counts/rsem_merge_counts index 5fba3a5..5a05aee 100755 --- a/target/executable/rsem_merge_counts/rsem_merge_counts +++ b/target/executable/rsem_merge_counts/rsem_merge_counts @@ -2,7 +2,7 @@ # rsem_merge_counts main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,43 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rsem_merge_counts main" - echo "" - echo "Merge the transcript quantification results obtained from rsem" - echo "calculate-expression across all samples." - echo "" - echo "Input:" - echo " --counts_gene" - echo " type: file, file must exist" - echo " Expression counts on gene level (genes)" - echo "" - echo " --counts_transcripts" - echo " type: file, file must exist" - echo " Expression counts on transcript level (isoforms)" - echo "" - echo "Output:" - echo " --merged_gene_counts" - echo " type: file, output, file must exist" - echo " default: rsem.merged.gene_counts.tsv" - echo " File containing gene counts across all samples." - echo "" - echo " --merged_gene_tpm" - echo " type: file, output, file must exist" - echo " default: rsem.merged.gene_tpm.tsv" - echo " File containing gene TPM across all samples." - echo "" - echo " --merged_transcript_counts" - echo " type: file, output, file must exist" - echo " default: rsem.merged.transcript_counts.tsv" - echo " File containing transcript counts across all samples." - echo "" - echo " --merged_transcript_tpm" - echo " type: file, output, file must exist" - echo " default: rsem.merged.transcript_tpm.tsv" - echo " File containing transcript TPM across all samples." -} # initialise variables VIASH_MODE='run' @@ -483,9 +446,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component rsem_merge_counts" -LABEL org.opencontainers.image.created="2024-12-05T14:40:09Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -600,6 +563,69 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rsem_merge_counts main" + echo "" + echo "Merge the transcript quantification results obtained from rsem" + echo "calculate-expression across all samples." + echo "" + echo "Input:" + echo " --counts_gene" + echo " type: file, file must exist" + echo " Expression counts on gene level (genes)" + echo "" + echo " --counts_transcripts" + echo " type: file, file must exist" + echo " Expression counts on transcript level (isoforms)" + echo "" + echo "Output:" + echo " --merged_gene_counts" + echo " type: file, output, file must exist" + echo " default: rsem.merged.gene_counts.tsv" + echo " File containing gene counts across all samples." + echo "" + echo " --merged_gene_tpm" + echo " type: file, output, file must exist" + echo " default: rsem.merged.gene_tpm.tsv" + echo " File containing gene TPM across all samples." + echo "" + echo " --merged_transcript_counts" + echo " type: file, output, file must exist" + echo " default: rsem.merged.transcript_counts.tsv" + echo " File containing transcript counts across all samples." + echo "" + echo " --merged_transcript_tpm" + echo " type: file, output, file must exist" + echo " default: rsem.merged.transcript_tpm.tsv" + echo " File containing transcript TPM across all samples." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml index 87395d4..e348789 100644 --- a/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -157,6 +157,9 @@ info: paths: - "modules/nf-core/rseqc/junctionannotation/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -264,15 +267,15 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_junctionannotation" executable: "target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -283,7 +286,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation index 7ecaa09..3c43155 100755 --- a/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation +++ b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation @@ -2,7 +2,7 @@ # rseqc_junctionannotation main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,72 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_junctionannotation main" - echo "" - echo "Compare detected splice junctions to reference gene model." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo " --min_intron" - echo " type: integer" - echo " default: 50" - echo " min: 1" - echo " Minimum intron length (bp), default = 50." - echo "" - echo "Output:" - echo " --output_log" - echo " type: file, output, file must exist" - echo " default: \$id.junction_annotation.log" - echo " output log of junction annotation script" - echo "" - echo " --output_plot_r" - echo " type: file, output, file must exist" - echo " default: \$id.junction_annotation_plot.r" - echo " r script to generate splice_junction and splice_events plot" - echo "" - echo " --output_junction_bed" - echo " type: file, output, file must exist" - echo " default: \$id.junction_annotation.bed" - echo " junction annotation file (bed format)" - echo "" - echo " --output_junction_interact" - echo " type: file, output, file must exist" - echo " default: \$id.junction_annotation.Interact.bed" - echo " interact file (bed format) of junctions. Can be uploaded to UCSC genome" - echo " browser or converted to bigInteract (using bedToBigBed program) for" - echo " visualization." - echo "" - echo " --output_junction_sheet" - echo " type: file, output, file must exist" - echo " default: \$id.junction_annotation.xls" - echo " junction annotation file (xls format)" - echo "" - echo " --output_splice_events_plot" - echo " type: file, output, file must exist" - echo " default: \$id.splice_events.pdf" - echo " plot of splice events (pdf)" - echo "" - echo " --output_splice_junctions_plot" - echo " type: file, output, file must exist" - echo " default: \$id.splice_junctions_plot.pdf" - echo " plot of junctions (pdf)" -} # initialise variables VIASH_MODE='run' @@ -519,9 +453,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionannotation" -LABEL org.opencontainers.image.created="2024-12-05T14:40:09Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -636,6 +570,98 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_junctionannotation main" + echo "" + echo "Compare detected splice junctions to reference gene model." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo " --min_intron" + echo " type: integer" + echo " default: 50" + echo " min: 1" + echo " Minimum intron length (bp), default = 50." + echo "" + echo "Output:" + echo " --output_log" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.log" + echo " output log of junction annotation script" + echo "" + echo " --output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation_plot.r" + echo " r script to generate splice_junction and splice_events plot" + echo "" + echo " --output_junction_bed" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.bed" + echo " junction annotation file (bed format)" + echo "" + echo " --output_junction_interact" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.Interact.bed" + echo " interact file (bed format) of junctions. Can be uploaded to UCSC genome" + echo " browser or converted to bigInteract (using bedToBigBed program) for" + echo " visualization." + echo "" + echo " --output_junction_sheet" + echo " type: file, output, file must exist" + echo " default: \$id.junction_annotation.xls" + echo " junction annotation file (xls format)" + echo "" + echo " --output_splice_events_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_events.pdf" + echo " plot of splice events (pdf)" + echo "" + echo " --output_splice_junctions_plot" + echo " type: file, output, file must exist" + echo " default: \$id.splice_junctions_plot.pdf" + echo " plot of junctions (pdf)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml index e164814..4704855 100644 --- a/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -146,6 +146,9 @@ info: paths: - "modules/nf-core/rseqc/junctionsaturation/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -253,15 +256,15 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_junctionsaturation" executable: "target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -272,7 +275,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation index c174b4e..c35bc37 100755 --- a/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation +++ b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation @@ -2,7 +2,7 @@ # rseqc_junctionsaturation main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,75 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_junctionsaturation main" - echo "" - echo "Compare detected splice junctions to reference gene model." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo " --sampling_percentile_lower_bound" - echo " type: integer" - echo " default: 5" - echo " min: 0" - echo " max: 100" - echo " Sampling starts from this percentile, must be an integer between 0 and" - echo " 100, default =5." - echo "" - echo " --sampling_percentile_upper_bound" - echo " type: integer" - echo " default: 100" - echo " min: 0" - echo " max: 100" - echo " Sampling ends at this percentile, must be an integer between 0 and 100," - echo " default =5." - echo "" - echo " --sampling_percentile_step" - echo " type: integer" - echo " default: 5" - echo " min: 0" - echo " max: 100" - echo " Sampling frequency in %. Smaller value means more sampling times. Must" - echo " be an integer between 0 and 100, default = 5." - echo "" - echo " --min_intron" - echo " type: integer" - echo " default: 50" - echo " min: 1" - echo " Minimum intron length (bp), default = 50." - echo "" - echo " --min_splice_read" - echo " type: integer" - echo " default: 1" - echo " min: 1" - echo " Minimum number of supporting reads to call a junction, default = 1." - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo "Output:" - echo " --output_plot_r" - echo " type: file, output, file must exist" - echo " default: \$id.junction_saturation_plot.r" - echo " r script to generate junction_saturation_plot plot" - echo "" - echo " --output_plot" - echo " type: file, output, file must exist" - echo " default: \$id.junction_saturation_plot.pdf" - echo " plot of junction saturation (pdf)" -} # initialise variables VIASH_MODE='run' @@ -522,9 +453,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionsaturation" -LABEL org.opencontainers.image.created="2024-12-05T14:40:10Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -639,6 +570,101 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_junctionsaturation main" + echo "" + echo "Compare detected splice junctions to reference gene model." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo " --sampling_percentile_lower_bound" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Sampling starts from this percentile, must be an integer between 0 and" + echo " 100, default =5." + echo "" + echo " --sampling_percentile_upper_bound" + echo " type: integer" + echo " default: 100" + echo " min: 0" + echo " max: 100" + echo " Sampling ends at this percentile, must be an integer between 0 and 100," + echo " default =5." + echo "" + echo " --sampling_percentile_step" + echo " type: integer" + echo " default: 5" + echo " min: 0" + echo " max: 100" + echo " Sampling frequency in %. Smaller value means more sampling times. Must" + echo " be an integer between 0 and 100, default = 5." + echo "" + echo " --min_intron" + echo " type: integer" + echo " default: 50" + echo " min: 1" + echo " Minimum intron length (bp), default = 50." + echo "" + echo " --min_splice_read" + echo " type: integer" + echo " default: 1" + echo " min: 1" + echo " Minimum number of supporting reads to call a junction, default = 1." + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo "Output:" + echo " --output_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.r" + echo " r script to generate junction_saturation_plot plot" + echo "" + echo " --output_plot" + echo " type: file, output, file must exist" + echo " default: \$id.junction_saturation_plot.pdf" + echo " plot of junction saturation (pdf)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml index 36d8c40..a702dd6 100644 --- a/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -60,6 +60,9 @@ info: paths: - "modules/nf-core/rseqc/readdistribution/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -166,15 +169,15 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_readdistribution" executable: "target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -185,7 +188,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution index 765fdb2..4633d8a 100755 --- a/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution +++ b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution @@ -2,7 +2,7 @@ # rseqc_readdistribution main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,27 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_readdistribution main" - echo "" - echo "Calculate how mapped reads are distributed over genomic features." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.read_distribution.txt" - echo " output file (txt) of read distribution analysis." -} # initialise variables VIASH_MODE='run' @@ -474,9 +453,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readdistribution" -LABEL org.opencontainers.image.created="2024-12-05T14:40:09Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -591,6 +570,53 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_readdistribution main" + echo "" + echo "Calculate how mapped reads are distributed over genomic features." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " Reference gene model in bed format" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: \$id.read_distribution.txt" + echo " output file (txt) of read distribution analysis." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml index 7a42440..2e60421 100644 --- a/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -108,6 +108,9 @@ info: paths: - "modules/nf-core/rseqc/readduplication/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -215,15 +218,15 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_readduplication" executable: "target/executable/rseqc/rseqc_readduplication/rseqc_readduplication" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -234,7 +237,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication index 90e4898..a1d95e7 100755 --- a/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication +++ b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication @@ -2,7 +2,7 @@ # rseqc_readduplication main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,52 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_readduplication main" - echo "" - echo "Calculate read duplication rate." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --read_count_upper_limit" - echo " type: integer" - echo " default: 500" - echo " min: 1" - echo " Upper limit of reads' occurence. Only used for plotting, default = 500" - echo " (times)." - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo "Output:" - echo " --output_duplication_rate_plot_r" - echo " type: file, output, file must exist" - echo " default: \$id.duplication_rate_plot.r" - echo " R script for generating duplication rate plot" - echo "" - echo " --output_duplication_rate_plot" - echo " type: file, output, file must exist" - echo " default: \$id.duplication_rate_plot.pdf" - echo " duplication rate plot (pdf)" - echo "" - echo " --output_duplication_rate_mapping" - echo " type: file, output, file must exist" - echo " default: \$id.duplication_rate_mapping.xls" - echo " Summary of mapping-based read duplication" - echo "" - echo " --output_duplication_rate_sequence" - echo " type: file, output, file must exist" - echo " default: \$id.duplication_rate_sequencing.xls" - echo " Summary of sequencing-based read duplication" -} # initialise variables VIASH_MODE='run' @@ -499,9 +453,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readduplication" -LABEL org.opencontainers.image.created="2024-12-05T14:40:08Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -616,6 +570,78 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_readduplication main" + echo "" + echo "Calculate read duplication rate." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " input alignment file in BAM or SAM format" + echo "" + echo " --read_count_upper_limit" + echo " type: integer" + echo " default: 500" + echo " min: 1" + echo " Upper limit of reads' occurence. Only used for plotting, default = 500" + echo " (times)." + echo "" + echo " --map_qual" + echo " type: integer" + echo " default: 30" + echo " min: 0" + echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" + echo " reads, default=30." + echo "" + echo "Output:" + echo " --output_duplication_rate_plot_r" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.r" + echo " R script for generating duplication rate plot" + echo "" + echo " --output_duplication_rate_plot" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_plot.pdf" + echo " duplication rate plot (pdf)" + echo "" + echo " --output_duplication_rate_mapping" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_mapping.xls" + echo " Summary of mapping-based read duplication" + echo "" + echo " --output_duplication_rate_sequence" + echo " type: file, output, file must exist" + echo " default: \$id.duplication_rate_sequencing.xls" + echo " Summary of sequencing-based read duplication" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/rseqc/rseqc_tin/.config.vsh.yaml b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml index f5fb845..8101ee7 100644 --- a/target/executable/rseqc/rseqc_tin/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml @@ -114,6 +114,9 @@ info: paths: - "modules/nf-core/rseqc/tin/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -218,15 +221,15 @@ build_info: engine: "docker|native" output: "target/executable/rseqc/rseqc_tin" executable: "target/executable/rseqc/rseqc_tin/rseqc_tin" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -237,7 +240,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/rseqc/rseqc_tin/rseqc_tin b/target/executable/rseqc/rseqc_tin/rseqc_tin index 714b54a..dc14d45 100755 --- a/target/executable/rseqc/rseqc_tin/rseqc_tin +++ b/target/executable/rseqc/rseqc_tin/rseqc_tin @@ -2,7 +2,7 @@ # rseqc_tin main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,55 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_tin main" - echo "" - echo "Calculte TIN (transcript integrity number) from RNA-seq reads" - echo "" - echo "Input:" - echo " --bam_input" - echo " type: file, required parameter, file must exist" - echo " Path to input alignment file in BAM or SAM format." - echo "" - echo " --bai_input" - echo " type: file, required parameter, file must exist" - echo " Path to bam index file in bai format." - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " BED file containing the reference gene model" - echo "" - echo " --minimum_coverage" - echo " type: integer" - echo " default: 10" - echo " min: 1" - echo " Minimum number of reads mapped to a transcript, default = 10." - echo "" - echo " --sample_size" - echo " type: integer" - echo " default: 100" - echo " min: 1" - echo " Number of equal-spaced nucleotide positions picked from mRNA. Note, if" - echo " this number is larger than the length of mRNA (L), it will be halved" - echo " until it's smaller than L (default = 100)" - echo "" - echo " --subtract_background" - echo " type: boolean_true" - echo " Set flag to subtract background noise (estimated from intronic reads)." - echo " Only use this option if there are substantial intronic reads." - echo "" - echo "Output:" - echo " --output_tin_summary" - echo " type: file, output, file must exist" - echo " default: \$id.tin_summary.txt" - echo " summary statistics (txt) of calculated TIN metrics" - echo "" - echo " --output_tin" - echo " type: file, output, file must exist" - echo " default: \$id.tin.xls" - echo " file with TIN metrics (xls)" -} # initialise variables VIASH_MODE='run' @@ -501,9 +452,9 @@ RUN apt-get update && \ RUN pip3 install RSeQC LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_tin" -LABEL org.opencontainers.image.created="2024-12-05T14:40:09Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -618,6 +569,81 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "rseqc_tin main" + echo "" + echo "Calculte TIN (transcript integrity number) from RNA-seq reads" + echo "" + echo "Input:" + echo " --bam_input" + echo " type: file, required parameter, file must exist" + echo " Path to input alignment file in BAM or SAM format." + echo "" + echo " --bai_input" + echo " type: file, required parameter, file must exist" + echo " Path to bam index file in bai format." + echo "" + echo " --refgene" + echo " type: file, required parameter, file must exist" + echo " BED file containing the reference gene model" + echo "" + echo " --minimum_coverage" + echo " type: integer" + echo " default: 10" + echo " min: 1" + echo " Minimum number of reads mapped to a transcript, default = 10." + echo "" + echo " --sample_size" + echo " type: integer" + echo " default: 100" + echo " min: 1" + echo " Number of equal-spaced nucleotide positions picked from mRNA. Note, if" + echo " this number is larger than the length of mRNA (L), it will be halved" + echo " until it's smaller than L (default = 100)" + echo "" + echo " --subtract_background" + echo " type: boolean_true" + echo " Set flag to subtract background noise (estimated from intronic reads)." + echo " Only use this option if there are substantial intronic reads." + echo "" + echo "Output:" + echo " --output_tin_summary" + echo " type: file, output, file must exist" + echo " default: \$id.tin_summary.txt" + echo " summary statistics (txt) of calculated TIN metrics" + echo "" + echo " --output_tin" + echo " type: file, output, file must exist" + echo " default: \$id.tin.xls" + echo " file with TIN metrics (xls)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/sortmerna/.config.vsh.yaml b/target/executable/sortmerna/.config.vsh.yaml index ed97ba4..6c854b0 100644 --- a/target/executable/sortmerna/.config.vsh.yaml +++ b/target/executable/sortmerna/.config.vsh.yaml @@ -100,6 +100,9 @@ info: - "modules/nf-core/sortmerna/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -196,15 +199,15 @@ build_info: engine: "docker|native" output: "target/executable/sortmerna" executable: "target/executable/sortmerna/sortmerna" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -215,7 +218,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/sortmerna/sortmerna b/target/executable/sortmerna/sortmerna index 9fe11d7..e4d6c24 100755 --- a/target/executable/sortmerna/sortmerna +++ b/target/executable/sortmerna/sortmerna @@ -2,7 +2,7 @@ # sortmerna main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,46 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "sortmerna main" - echo "" - echo "Local sequence alignment tool for filtering, mapping and clustering. The main" - echo "application of SortMeRNA is filtering rRNA from metatranscriptomic data." - echo "SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and" - echo "one or multiple rRNA database file(s), and sorts apart aligned and rejected" - echo "reads into two files." - echo "" - echo "Input:" - echo " --paired" - echo " type: boolean" - echo " Are the reads single-end or paired-end" - echo "" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " Input fastq" - echo "" - echo " --ribo_database_manifest" - echo " type: file, multiple values allowed, file must exist" - echo " Text file containing paths to fasta files (one per line) that will be" - echo " used to create the database for SortMeRNA." - echo "" - echo "Output:" - echo " --sortmerna_log" - echo " type: file, output" - echo " default: \$id.sortmerna.log" - echo " Sortmerna log file." - echo "" - echo " --fastq_1" - echo " type: file, required parameter, output, file must exist" - echo " default: \$id.\$key.read_1.fastq.gz" - echo " Output file for read 1." - echo "" - echo " --fastq_2" - echo " type: file, output" - echo " default: \$id.\$key.read_2.fastq.gz" - echo " Output file for read 2." -} # initialise variables VIASH_MODE='run' @@ -486,9 +446,9 @@ function ViashDockerfile { FROM quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component sortmerna" -LABEL org.opencontainers.image.created="2024-12-05T14:40:13Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -603,6 +563,72 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sortmerna main" + echo "" + echo "Local sequence alignment tool for filtering, mapping and clustering. The main" + echo "application of SortMeRNA is filtering rRNA from metatranscriptomic data." + echo "SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and" + echo "one or multiple rRNA database file(s), and sorts apart aligned and rejected" + echo "reads into two files." + echo "" + echo "Input:" + echo " --paired" + echo " type: boolean" + echo " Are the reads single-end or paired-end" + echo "" + echo " --input" + echo " type: file, multiple values allowed, file must exist" + echo " Input fastq" + echo "" + echo " --ribo_database_manifest" + echo " type: file, multiple values allowed, file must exist" + echo " Text file containing paths to fasta files (one per line) that will be" + echo " used to create the database for SortMeRNA." + echo "" + echo "Output:" + echo " --sortmerna_log" + echo " type: file, output" + echo " default: \$id.sortmerna.log" + echo " Sortmerna log file." + echo "" + echo " --fastq_1" + echo " type: file, required parameter, output, file must exist" + echo " default: \$id.\$key.read_1.fastq.gz" + echo " Output file for read 1." + echo "" + echo " --fastq_2" + echo " type: file, output" + echo " default: \$id.\$key.read_2.fastq.gz" + echo " Output file for read 2." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/stringtie/.config.vsh.yaml b/target/executable/stringtie/.config.vsh.yaml index 18571dc..7da9e16 100644 --- a/target/executable/stringtie/.config.vsh.yaml +++ b/target/executable/stringtie/.config.vsh.yaml @@ -117,6 +117,9 @@ info: - "modules/nf-core/stringtie/stringtie/meta.yml" last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -220,15 +223,15 @@ build_info: engine: "docker|native" output: "target/executable/stringtie" executable: "target/executable/stringtie/stringtie" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -239,7 +242,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/stringtie/stringtie b/target/executable/stringtie/stringtie index b32cb3d..ccdc6b6 100755 --- a/target/executable/stringtie/stringtie +++ b/target/executable/stringtie/stringtie @@ -2,7 +2,7 @@ # stringtie main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,50 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "stringtie main" - echo "" - echo "Transcript assembly and quantification for RNA-Seq" - echo "" - echo "Input:" - echo " --strandedness" - echo " type: string" - echo " Forward or reverse strand?" - echo "" - echo " --bam" - echo " type: file, file must exist" - echo "" - echo " --annotation_gtf" - echo " type: file, file must exist" - echo "" - echo " --extra_stringtie_args" - echo " type: string" - echo " Extra arguments for running StringTie" - echo "" - echo " --stringtie_ignore_gtf" - echo " type: boolean" - echo " Perform reference-guided de novo assembly of transcripts using" - echo " StringTie, i.e. don't restrict to those in GTF file." - echo "" - echo "Output:" - echo " --transcript_gtf" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.transcripts.gtf" - echo "" - echo " --coverage_gtf" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.coverage.gtf" - echo "" - echo " --abundance" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.abundance.txt" - echo "" - echo " --ballgown" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.ballgown" - echo " for running ballgown" -} # initialise variables VIASH_MODE='run' @@ -496,9 +452,9 @@ tar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \ cp stringtie-2.2.1.Linux_x86_64/stringtie /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component stringtie" -LABEL org.opencontainers.image.created="2024-12-05T14:40:13Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:34Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -613,6 +569,76 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "stringtie main" + echo "" + echo "Transcript assembly and quantification for RNA-Seq" + echo "" + echo "Input:" + echo " --strandedness" + echo " type: string" + echo " Forward or reverse strand?" + echo "" + echo " --bam" + echo " type: file, file must exist" + echo "" + echo " --annotation_gtf" + echo " type: file, file must exist" + echo "" + echo " --extra_stringtie_args" + echo " type: string" + echo " Extra arguments for running StringTie" + echo "" + echo " --stringtie_ignore_gtf" + echo " type: boolean" + echo " Perform reference-guided de novo assembly of transcripts using" + echo " StringTie, i.e. don't restrict to those in GTF file." + echo "" + echo "Output:" + echo " --transcript_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.transcripts.gtf" + echo "" + echo " --coverage_gtf" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.coverage.gtf" + echo "" + echo " --abundance" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.abundance.txt" + echo "" + echo " --ballgown" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.ballgown" + echo " for running ballgown" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/summarizedexperiment/.config.vsh.yaml b/target/executable/summarizedexperiment/.config.vsh.yaml index dfcdff8..eed56ba 100644 --- a/target/executable/summarizedexperiment/.config.vsh.yaml +++ b/target/executable/summarizedexperiment/.config.vsh.yaml @@ -96,6 +96,9 @@ info: - "modules/local/summarizedexperiment/main.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -178,21 +181,17 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "rocker/r2u:22.04" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "r-base" - - "libcurl4-openssl-dev" - interactive: false - type: "r" bioc: - "SummarizedExperiment" - "tximeta" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -203,15 +202,15 @@ build_info: engine: "docker|native" output: "target/executable/summarizedexperiment" executable: "target/executable/summarizedexperiment/summarizedexperiment" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -222,7 +221,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/summarizedexperiment/summarizedexperiment b/target/executable/summarizedexperiment/summarizedexperiment index dfeecc1..7e904ee 100755 --- a/target/executable/summarizedexperiment/summarizedexperiment +++ b/target/executable/summarizedexperiment/summarizedexperiment @@ -2,7 +2,7 @@ # summarizedexperiment main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,39 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "summarizedexperiment main" - echo "" - echo "Create SummarizedExperiment object from Salmon counts" - echo "" - echo "Input:" - echo " --tpm_gene" - echo " type: file, file must exist" - echo "" - echo " --counts_gene" - echo " type: file, file must exist" - echo "" - echo " --counts_gene_length_scaled" - echo " type: file, file must exist" - echo "" - echo " --counts_gene_scaled" - echo " type: file, file must exist" - echo "" - echo " --tpm_transcript" - echo " type: file, file must exist" - echo "" - echo " --counts_transcript" - echo " type: file, file must exist" - echo "" - echo " --tx2gene_tsv" - echo " type: file, file must exist" - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: merged_summarizedexperiment" -} # initialise variables VIASH_MODE='run' @@ -476,20 +443,16 @@ function ViashDockerfile { if [[ "$engine_id" == "docker" ]]; then cat << 'VIASHDOCKER' -FROM ubuntu:22.04 +FROM rocker/r2u:22.04 ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y r-base libcurl4-openssl-dev && \ - rm -rf /var/lib/apt/lists/* - -RUN Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ - Rscript -e 'if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ - Rscript -e 'if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' LABEL org.opencontainers.image.description="Companion container for running component summarizedexperiment" -LABEL org.opencontainers.image.created="2024-12-05T14:40:13Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:33Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -604,6 +567,65 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "summarizedexperiment main" + echo "" + echo "Create SummarizedExperiment object from Salmon counts" + echo "" + echo "Input:" + echo " --tpm_gene" + echo " type: file, file must exist" + echo "" + echo " --counts_gene" + echo " type: file, file must exist" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, file must exist" + echo "" + echo " --counts_gene_scaled" + echo " type: file, file must exist" + echo "" + echo " --tpm_transcript" + echo " type: file, file must exist" + echo "" + echo " --counts_transcript" + echo " type: file, file must exist" + echo "" + echo " --tx2gene_tsv" + echo " type: file, file must exist" + echo "" + echo "Output:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/tx2gene/.config.vsh.yaml b/target/executable/tx2gene/.config.vsh.yaml index 0fcdc6d..e4981da 100644 --- a/target/executable/tx2gene/.config.vsh.yaml +++ b/target/executable/tx2gene/.config.vsh.yaml @@ -84,6 +84,9 @@ info: - "modules/local/tx2gene/main.nf" last_sha: "839ac5cab892504514cc96d44e99e70516b239d2" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -189,15 +192,15 @@ build_info: engine: "docker|native" output: "target/executable/tx2gene" executable: "target/executable/tx2gene/tx2gene" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -208,7 +211,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/tx2gene/tx2gene b/target/executable/tx2gene/tx2gene index e06aaab..9d17752 100755 --- a/target/executable/tx2gene/tx2gene +++ b/target/executable/tx2gene/tx2gene @@ -2,7 +2,7 @@ # tx2gene main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,37 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "tx2gene main" - echo "" - echo "Get transcript id (tx) to gene names for tximport" - echo "" - echo "Input:" - echo " --quant_results" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --gtf" - echo " type: file, file must exist" - echo "" - echo " --gtf_extra_attributes" - echo " type: string" - echo " default: gene_name" - echo "" - echo " --gtf_group_features" - echo " type: string" - echo " default: gene_id" - echo "" - echo " --quant_type" - echo " type: string" - echo " choices: [ salmon, kallisto ]" - echo " Method used for quantification" - echo "" - echo "Output:" - echo " --tsv" - echo " type: file, output, file must exist" - echo " default: tx2gene.tsv" -} # initialise variables VIASH_MODE='run' @@ -483,9 +452,9 @@ RUN apt-get update && \ RUN pip install --upgrade pip LABEL org.opencontainers.image.description="Companion container for running component tx2gene" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:30Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -600,6 +569,63 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tx2gene main" + echo "" + echo "Get transcript id (tx) to gene names for tximport" + echo "" + echo "Input:" + echo " --quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --gtf" + echo " type: file, file must exist" + echo "" + echo " --gtf_extra_attributes" + echo " type: string" + echo " default: gene_name" + echo "" + echo " --gtf_group_features" + echo " type: string" + echo " default: gene_id" + echo "" + echo " --quant_type" + echo " type: string" + echo " choices: [ salmon, kallisto ]" + echo " Method used for quantification" + echo "" + echo "Output:" + echo " --tsv" + echo " type: file, output, file must exist" + echo " default: tx2gene.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/tximport/.config.vsh.yaml b/target/executable/tximport/.config.vsh.yaml index 285fc77..f64d47c 100644 --- a/target/executable/tximport/.config.vsh.yaml +++ b/target/executable/tximport/.config.vsh.yaml @@ -143,6 +143,9 @@ info: - "modules/local/tximport/main.nf" last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -245,6 +248,7 @@ engines: - "tximport" - "tximeta" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -255,15 +259,15 @@ build_info: engine: "docker|native" output: "target/executable/tximport" executable: "target/executable/tximport/tximport" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -274,7 +278,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/tximport/tximport b/target/executable/tximport/tximport index 96be464..500f6c8 100755 --- a/target/executable/tximport/tximport +++ b/target/executable/tximport/tximport @@ -2,7 +2,7 @@ # tximport main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,57 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "tximport main" - echo "" - echo "Get dataframe linking transcript ID, gene ID, and gene name" - echo "" - echo "Input:" - echo " --quant_results" - echo " type: file, multiple values allowed, file must exist" - echo "" - echo " --tx2gene_tsv" - echo " type: file, file must exist" - echo "" - echo " --quant_type" - echo " type: string" - echo " choices: [ salmon, kallisto ]" - echo " Method used for quantification" - echo "" - echo "Output:" - echo " --tpm_gene" - echo " type: file, output, file must exist" - echo " default: merged.gene_tpm.tsv" - echo "" - echo " --counts_gene" - echo " type: file, output, file must exist" - echo " default: merged.gene_counts.tsv" - echo "" - echo " --counts_gene_length_scaled" - echo " type: file, output, file must exist" - echo " default: merged.gene_counts_length_scaled.tsv" - echo "" - echo " --counts_gene_scaled" - echo " type: file, output, file must exist" - echo " default: merged.gene_counts_scaled.tsv" - echo "" - echo " --lengths_gene" - echo " type: file, output, file must exist" - echo " default: merged.gene_lengths.tsv" - echo "" - echo " --tpm_transcript" - echo " type: file, output, file must exist" - echo " default: merged.transcript_tpm.tsv" - echo "" - echo " --counts_transcript" - echo " type: file, output, file must exist" - echo " default: merged.transcript_counts.tsv" - echo "" - echo " --lengths_transcript" - echo " type: file, output, file must exist" - echo " default: merged.transcript_lengths.tsv" -} # initialise variables VIASH_MODE='run' @@ -500,17 +449,17 @@ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y r-base libcurl4-openssl-dev libssl-dev libxml2-dev && \ rm -rf /var/lib/apt/lists/* -RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ - Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ - Rscript -e 'if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ - Rscript -e 'if (!requireNamespace("tximport", quietly = TRUE)) BiocManager::install("tximport")' && \ - Rscript -e 'if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' && \ - Rscript -e 'remotes::install_cran(c("jsonlite"), repos = "https://cran.rstudio.com")' +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) BiocManager::install("SummarizedExperiment")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximport", quietly = TRUE)) BiocManager::install("tximport")' && \ + Rscript -e 'options(warn = 2); if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' && \ + Rscript -e 'options(warn = 2); remotes::install_cran(c("jsonlite"), repos = "https://cran.rstudio.com")' LABEL org.opencontainers.image.description="Companion container for running component tximport" -LABEL org.opencontainers.image.created="2024-12-05T14:40:12Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:29Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -625,6 +574,83 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tximport main" + echo "" + echo "Get dataframe linking transcript ID, gene ID, and gene name" + echo "" + echo "Input:" + echo " --quant_results" + echo " type: file, multiple values allowed, file must exist" + echo "" + echo " --tx2gene_tsv" + echo " type: file, file must exist" + echo "" + echo " --quant_type" + echo " type: string" + echo " choices: [ salmon, kallisto ]" + echo " Method used for quantification" + echo "" + echo "Output:" + echo " --tpm_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_tpm.tsv" + echo "" + echo " --counts_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts.tsv" + echo "" + echo " --counts_gene_length_scaled" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts_length_scaled.tsv" + echo "" + echo " --counts_gene_scaled" + echo " type: file, output, file must exist" + echo " default: merged.gene_counts_scaled.tsv" + echo "" + echo " --lengths_gene" + echo " type: file, output, file must exist" + echo " default: merged.gene_lengths.tsv" + echo "" + echo " --tpm_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_tpm.tsv" + echo "" + echo " --counts_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_counts.tsv" + echo "" + echo " --lengths_transcript" + echo " type: file, output, file must exist" + echo " default: merged.transcript_lengths.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/ucsc/bedclip/.config.vsh.yaml b/target/executable/ucsc/bedclip/.config.vsh.yaml index b9e743f..d71d3b5 100644 --- a/target/executable/ucsc/bedclip/.config.vsh.yaml +++ b/target/executable/ucsc/bedclip/.config.vsh.yaml @@ -62,6 +62,9 @@ info: - "modules/nf-core/ucsc/bedclip/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -168,15 +171,15 @@ build_info: engine: "docker|native" output: "target/executable/ucsc/bedclip" executable: "target/executable/ucsc/bedclip/bedclip" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -187,7 +190,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/ucsc/bedclip/bedclip b/target/executable/ucsc/bedclip/bedclip index 063101b..b411493 100755 --- a/target/executable/ucsc/bedclip/bedclip +++ b/target/executable/ucsc/bedclip/bedclip @@ -2,7 +2,7 @@ # bedclip main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,27 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedclip main" - echo "" - echo "Remove lines from bed file that refer to off-chromosome locations" - echo "" - echo "Input:" - echo " --input_bedgraph" - echo " type: file, file must exist" - echo " bedGraph file which should be converted" - echo "" - echo " --sizes" - echo " type: file, file must exist" - echo " File with chromosome sizes" - echo "" - echo "Output:" - echo " --output_bedgraph" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.bedgraph" - echo " bedGraph file after clipping" -} # initialise variables VIASH_MODE='run' @@ -473,9 +452,9 @@ RUN apt-get update && \ RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component ucsc bedclip" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:32Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -590,6 +569,53 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedclip main" + echo "" + echo "Remove lines from bed file that refer to off-chromosome locations" + echo "" + echo "Input:" + echo " --input_bedgraph" + echo " type: file, file must exist" + echo " bedGraph file which should be converted" + echo "" + echo " --sizes" + echo " type: file, file must exist" + echo " File with chromosome sizes" + echo "" + echo "Output:" + echo " --output_bedgraph" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.bedgraph" + echo " bedGraph file after clipping" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml index 5deba06..a6fc0f3 100644 --- a/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml +++ b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -62,6 +62,9 @@ info: - "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -168,15 +171,15 @@ build_info: engine: "docker|native" output: "target/executable/ucsc/bedgraphtobigwig" executable: "target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -187,7 +190,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig index d4fa00f..77c5463 100755 --- a/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig +++ b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig @@ -2,7 +2,7 @@ # bedgraphtobigwig main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,27 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bedgraphtobigwig main" - echo "" - echo "Convert a bedGraph file to bigWig format" - echo "" - echo "Input:" - echo " --bedgraph" - echo " type: file, file must exist" - echo " bedGraph file which should be converted" - echo "" - echo " --sizes" - echo " type: file, file must exist" - echo " File with chromosome sizes" - echo "" - echo "Output:" - echo " --bigwig" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.bigwig" - echo " bigWig coverage file relative to genes on the input file" -} # initialise variables VIASH_MODE='run' @@ -473,9 +452,9 @@ RUN apt-get update && \ RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component ucsc bedgraphtobigwig" -LABEL org.opencontainers.image.created="2024-12-05T14:40:11Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="2b3d511b34246648b934fd1dc99b22e0a71c37f2" +LABEL org.opencontainers.image.created="2025-05-06T19:18:31Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="f52978a0e25cae182b7874b4b8aa3afc183e880e" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -590,6 +569,53 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bedgraphtobigwig main" + echo "" + echo "Convert a bedGraph file to bigWig format" + echo "" + echo "Input:" + echo " --bedgraph" + echo " type: file, file must exist" + echo " bedGraph file which should be converted" + echo "" + echo " --sizes" + echo " type: file, file must exist" + echo " File with chromosome sizes" + echo "" + echo "Output:" + echo " --bigwig" + echo " type: file, output, file must exist" + echo " default: \$id.\$key.bigwig" + echo " bigWig coverage file relative to genes on the input file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml index 453a60e..08093b6 100644 --- a/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml +++ b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -455,6 +455,9 @@ description: "A viash sub-workflow for genome alignment and quantification stage \ nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -594,9 +597,9 @@ build_info: engine: "native" output: "target/executable/workflows/genome_alignment_and_quant" executable: "target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -613,7 +616,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -624,7 +627,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant index b3a7c32..f15924b 100755 --- a/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant +++ b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant @@ -2,7 +2,7 @@ # genome_alignment_and_quant main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "genome_alignment_and_quant main" @@ -363,12 +369,19 @@ function ViashHelp { echo " type: file, output, file must exist" echo " default: \$id.transcript.bam" echo " Transcript BAM file (optional)" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/merge_quant_results/.config.vsh.yaml b/target/executable/workflows/merge_quant_results/.config.vsh.yaml index c3dad7b..15e45fe 100644 --- a/target/executable/workflows/merge_quant_results/.config.vsh.yaml +++ b/target/executable/workflows/merge_quant_results/.config.vsh.yaml @@ -184,6 +184,9 @@ description: "A sub-workflow to merge the counts obtained from salmon quant acro \ all samples." info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -282,9 +285,9 @@ build_info: engine: "native" output: "target/executable/workflows/merge_quant_results" executable: "target/executable/workflows/merge_quant_results/merge_quant_results" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/tx2gene" - "target/nextflow/tximport" @@ -294,7 +297,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -305,7 +308,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/merge_quant_results/merge_quant_results b/target/executable/workflows/merge_quant_results/merge_quant_results index 80bbd94..8f8a122 100755 --- a/target/executable/workflows/merge_quant_results/merge_quant_results +++ b/target/executable/workflows/merge_quant_results/merge_quant_results @@ -2,7 +2,7 @@ # merge_quant_results main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "merge_quant_results main" @@ -239,12 +245,19 @@ function ViashHelp { echo " --quant_merged_summarizedexperiment" echo " type: file, output, file must exist" echo " example: quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/post_processing/.config.vsh.yaml b/target/executable/workflows/post_processing/.config.vsh.yaml index f4b8235..77d9016 100644 --- a/target/executable/workflows/post_processing/.config.vsh.yaml +++ b/target/executable/workflows/post_processing/.config.vsh.yaml @@ -350,6 +350,9 @@ description: "A viash sub-workflow for the post-processing stage of nf-core/rnas \ pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -481,9 +484,9 @@ build_info: engine: "native" output: "target/executable/workflows/post_processing" executable: "target/executable/workflows/post_processing/post_processing" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/picard_markduplicates" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -500,7 +503,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -511,7 +514,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/post_processing/post_processing b/target/executable/workflows/post_processing/post_processing index 85ca3e6..58e0426 100755 --- a/target/executable/workflows/post_processing/post_processing +++ b/target/executable/workflows/post_processing/post_processing @@ -2,7 +2,7 @@ # post_processing main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "post_processing main" @@ -316,12 +322,19 @@ function ViashHelp { echo " --bigwig_reverse" echo " type: file, output, file must exist" echo " default: \$id.reverse.bigwig" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/pre_processing/.config.vsh.yaml b/target/executable/workflows/pre_processing/.config.vsh.yaml index ac28919..2eb8ac6 100644 --- a/target/executable/workflows/pre_processing/.config.vsh.yaml +++ b/target/executable/workflows/pre_processing/.config.vsh.yaml @@ -507,6 +507,9 @@ resources: description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -636,9 +639,9 @@ build_info: engine: "native" output: "target/executable/workflows/pre_processing" executable: "target/executable/workflows/pre_processing/pre_processing" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc" - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract" @@ -653,7 +656,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -664,7 +667,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/pre_processing/pre_processing b/target/executable/workflows/pre_processing/pre_processing index cf025d8..122b726 100755 --- a/target/executable/workflows/pre_processing/pre_processing +++ b/target/executable/workflows/pre_processing/pre_processing @@ -2,7 +2,7 @@ # pre_processing main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "pre_processing main" @@ -399,12 +405,19 @@ function ViashHelp { echo " --merged_out" echo " type: file, output, file must exist" echo " File name to store merged fastp output." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/prepare_genome/.config.vsh.yaml b/target/executable/workflows/prepare_genome/.config.vsh.yaml index 18455ef..8221018 100644 --- a/target/executable/workflows/prepare_genome/.config.vsh.yaml +++ b/target/executable/workflows/prepare_genome/.config.vsh.yaml @@ -364,6 +364,9 @@ resources: description: "A subworkflow for preparing all the required genome references\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -506,9 +509,9 @@ build_info: engine: "native" output: "target/executable/workflows/prepare_genome" executable: "target/executable/workflows/prepare_genome/prepare_genome" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/gunzip" - "target/dependencies/vsh/vsh/biobox/main/nextflow/gffread" @@ -528,7 +531,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -539,7 +542,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/prepare_genome/prepare_genome b/target/executable/workflows/prepare_genome/prepare_genome index 848aec9..075ef17 100755 --- a/target/executable/workflows/prepare_genome/prepare_genome +++ b/target/executable/workflows/prepare_genome/prepare_genome @@ -2,7 +2,7 @@ # prepare_genome main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "prepare_genome main" @@ -327,12 +333,19 @@ function ViashHelp { echo " type: file, output, file must exist" echo " default: reference_genome.fasta.fai" echo " FASTA index file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml index 0b217b3..0db2462 100644 --- a/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml +++ b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -190,6 +190,9 @@ description: "A viash sub-workflow for pseudo alignment and quantification stage \ nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -289,9 +292,9 @@ build_info: engine: "native" output: "target/executable/workflows/pseudo_alignment_and_quant" executable: "target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant" @@ -300,7 +303,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -311,7 +314,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant index b6e3d82..702b69c 100755 --- a/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant +++ b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant @@ -2,7 +2,7 @@ # pseudo_alignment_and_quant main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "pseudo_alignment_and_quant main" @@ -251,12 +257,19 @@ function ViashHelp { echo " --kallisto_quant_results_file" echo " type: file, output, file must exist" echo " default: \$id.abundance.tsv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/quality_control/.config.vsh.yaml b/target/executable/workflows/quality_control/.config.vsh.yaml index 9b0d90b..4ef73bf 100644 --- a/target/executable/workflows/quality_control/.config.vsh.yaml +++ b/target/executable/workflows/quality_control/.config.vsh.yaml @@ -1405,6 +1405,9 @@ description: "A subworkflow for the final quality control stage of the nf-core/r \ pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1560,9 +1563,9 @@ build_info: engine: "native" output: "target/executable/workflows/quality_control" executable: "target/executable/workflows/quality_control/quality_control" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment" @@ -1587,7 +1590,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -1598,7 +1601,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/quality_control/quality_control b/target/executable/workflows/quality_control/quality_control index 40f5031..fcd03f6 100755 --- a/target/executable/workflows/quality_control/quality_control +++ b/target/executable/workflows/quality_control/quality_control @@ -2,7 +2,7 @@ # quality_control main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "quality_control main" @@ -797,12 +803,19 @@ function ViashHelp { echo " --pseudo_quant_merged_summarizedexperiment" echo " type: file, output, file must exist" echo " default: pseudo_quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/workflows/rnaseq/.config.vsh.yaml b/target/executable/workflows/rnaseq/.config.vsh.yaml index 96a58fa..0020664 100644 --- a/target/executable/workflows/rnaseq/.config.vsh.yaml +++ b/target/executable/workflows/rnaseq/.config.vsh.yaml @@ -1948,6 +1948,9 @@ test_resources: entrypoint: "test_wf" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -2058,9 +2061,9 @@ build_info: engine: "native" output: "target/executable/workflows/rnaseq" executable: "target/executable/workflows/rnaseq/rnaseq" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/workflows/prepare_genome" - "target/nextflow/cat_fastq" @@ -2074,7 +2077,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -2085,7 +2088,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/executable/workflows/rnaseq/rnaseq b/target/executable/workflows/rnaseq/rnaseq index 9224adf..e7c0c26 100755 --- a/target/executable/workflows/rnaseq/rnaseq +++ b/target/executable/workflows/rnaseq/rnaseq @@ -2,7 +2,7 @@ # rnaseq main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,6 +169,12 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='native' + + # ViashHelp: Display helpful explanation about this executable function ViashHelp { echo "rnaseq main" @@ -1025,12 +1031,19 @@ function ViashHelp { echo " type: file, output, file must exist" echo " default:" echo "pseudo_alignment_quantification/quant_merged_summarizedexperiment" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: native." + echo " Default: native" } -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='native' - # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/nextflow/bedtools_genomecov/.config.vsh.yaml b/target/nextflow/bedtools_genomecov/.config.vsh.yaml index 927ae6f..2887102 100644 --- a/target/nextflow/bedtools_genomecov/.config.vsh.yaml +++ b/target/nextflow/bedtools_genomecov/.config.vsh.yaml @@ -80,6 +80,9 @@ info: - "modules/local/bedtools_genomecov.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -182,15 +185,15 @@ build_info: engine: "docker|native" output: "target/nextflow/bedtools_genomecov" executable: "target/nextflow/bedtools_genomecov/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -201,7 +204,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/bedtools_genomecov/main.nf b/target/nextflow/bedtools_genomecov/main.nf index 4843411..22fb271 100644 --- a/target/nextflow/bedtools_genomecov/main.nf +++ b/target/nextflow/bedtools_genomecov/main.nf @@ -1,6 +1,6 @@ // bedtools_genomecov main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2915,6 +3146,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3038,9 +3273,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/bedtools_genomecov", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3048,7 +3283,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3067,7 +3302,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/bedtools_genomecov/nextflow_schema.json b/target/nextflow/bedtools_genomecov/nextflow_schema.json index 311b6f8..4a4a0b3 100644 --- a/target/nextflow/bedtools_genomecov/nextflow_schema.json +++ b/target/nextflow/bedtools_genomecov/nextflow_schema.json @@ -60,10 +60,10 @@ "bedgraph_forward": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. " + "description": "Type: `file`, default: `$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.forward.bedgraph`. " , - "default":"$id.$key.bedgraph_forward.bedgraph" + "default":"$id.forward.bedgraph" } @@ -71,10 +71,10 @@ "bedgraph_reverse": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. " + "description": "Type: `file`, default: `$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.reverse.bedgraph`. " , - "default":"$id.$key.bedgraph_reverse.bedgraph" + "default":"$id.reverse.bedgraph" } diff --git a/target/nextflow/cat_additional_fasta/.config.vsh.yaml b/target/nextflow/cat_additional_fasta/.config.vsh.yaml index cddda7c..7435d06 100644 --- a/target/nextflow/cat_additional_fasta/.config.vsh.yaml +++ b/target/nextflow/cat_additional_fasta/.config.vsh.yaml @@ -90,6 +90,9 @@ info: - "modules/local/cat_additional_fasta.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -186,15 +189,15 @@ build_info: engine: "docker|native" output: "target/nextflow/cat_additional_fasta" executable: "target/nextflow/cat_additional_fasta/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -205,7 +208,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/cat_additional_fasta/main.nf b/target/nextflow/cat_additional_fasta/main.nf index bf7a1f3..b6229a1 100644 --- a/target/nextflow/cat_additional_fasta/main.nf +++ b/target/nextflow/cat_additional_fasta/main.nf @@ -1,6 +1,6 @@ // cat_additional_fasta main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2924,6 +3155,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3039,9 +3274,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/cat_additional_fasta", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3049,7 +3284,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3068,7 +3303,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/cat_additional_fasta/nextflow_schema.json b/target/nextflow/cat_additional_fasta/nextflow_schema.json index 61a5eee..7eef229 100644 --- a/target/nextflow/cat_additional_fasta/nextflow_schema.json +++ b/target/nextflow/cat_additional_fasta/nextflow_schema.json @@ -67,10 +67,10 @@ "fasta_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fasta_output.fasta_output`. Concatenated FASTA file", - "help_text": "Type: `file`, default: `$id.$key.fasta_output.fasta_output`. Concatenated FASTA file." + "description": "Type: `file`, default: `$id.$key.fasta_output`. Concatenated FASTA file", + "help_text": "Type: `file`, default: `$id.$key.fasta_output`. Concatenated FASTA file." , - "default":"$id.$key.fasta_output.fasta_output" + "default":"$id.$key.fasta_output" } @@ -78,10 +78,10 @@ "gtf_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.gtf_output.gtf_output`. Concatenated GTF file", - "help_text": "Type: `file`, default: `$id.$key.gtf_output.gtf_output`. Concatenated GTF file." + "description": "Type: `file`, default: `$id.$key.gtf_output`. Concatenated GTF file", + "help_text": "Type: `file`, default: `$id.$key.gtf_output`. Concatenated GTF file." , - "default":"$id.$key.gtf_output.gtf_output" + "default":"$id.$key.gtf_output" } diff --git a/target/nextflow/cat_fastq/.config.vsh.yaml b/target/nextflow/cat_fastq/.config.vsh.yaml index e6f343c..35afd9a 100644 --- a/target/nextflow/cat_fastq/.config.vsh.yaml +++ b/target/nextflow/cat_fastq/.config.vsh.yaml @@ -77,6 +77,9 @@ info: - "modules/nf-core/cat/fastq/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -173,15 +176,15 @@ build_info: engine: "docker|native" output: "target/nextflow/cat_fastq" executable: "target/nextflow/cat_fastq/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -192,7 +195,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/cat_fastq/main.nf b/target/nextflow/cat_fastq/main.nf index 030df54..265eda5 100644 --- a/target/nextflow/cat_fastq/main.nf +++ b/target/nextflow/cat_fastq/main.nf @@ -1,6 +1,6 @@ // cat_fastq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2915,6 +3146,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3030,9 +3265,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/cat_fastq", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3040,7 +3275,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3059,7 +3294,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/cat_fastq/nextflow_schema.json b/target/nextflow/cat_fastq/nextflow_schema.json index af6d77a..6698d61 100644 --- a/target/nextflow/cat_fastq/nextflow_schema.json +++ b/target/nextflow/cat_fastq/nextflow_schema.json @@ -47,10 +47,10 @@ "fastq_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Concatenated read 1 fastq", - "help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Concatenated read 1 fastq" + "description": "Type: `file`, default: `$id_r1.fastq`. Concatenated read 1 fastq", + "help_text": "Type: `file`, default: `$id_r1.fastq`. Concatenated read 1 fastq" , - "default":"$id.$key.fastq_1.fastq" + "default":"$id_r1.fastq" } @@ -58,10 +58,10 @@ "fastq_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Concatenated read 2 fastq", - "help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Concatenated read 2 fastq" + "description": "Type: `file`, default: `$id_r2.fastq`. Concatenated read 2 fastq", + "help_text": "Type: `file`, default: `$id_r2.fastq`. Concatenated read 2 fastq" , - "default":"$id.$key.fastq_2.fastq" + "default":"$id_r2.fastq" } diff --git a/target/nextflow/copy_if_exists/.config.vsh.yaml b/target/nextflow/copy_if_exists/.config.vsh.yaml new file mode 100644 index 0000000..10659ab --- /dev/null +++ b/target/nextflow/copy_if_exists/.config.vsh.yaml @@ -0,0 +1,183 @@ +name: "copy_if_exists" +version: "main" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--required_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/required_file.txt" + must_exist: false + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--optional_file" + info: null + example: + - "/tmp/rnaseq_workflow_config/optional_file.txt" + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "copy_if_exists_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "required_file.txt" +- type: "file" + path: "optional_file.txt" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +repositories: +- type: "vsh" + name: "biobox" + repo: "biobox" + tag: "main" +- type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/extra/copy_if_exists/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/copy_if_exists" + executable: "target/nextflow/copy_if_exists/main.nf" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" +package_config: + name: "rnaseq" + version: "main" + info: + test_resources: + - path: "gs://viash-hub-resources/rnaseq/v1" + dest: "testData" + repositories: + - type: "vsh" + name: "biobox" + repo: "biobox" + tag: "main" + - type: "vsh" + name: "craftbox" + repo: "craftbox" + tag: "v0.1.0" + viash_version: "0.9.2" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config',\ + \ dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + organization: "vsh" diff --git a/target/nextflow/copy_if_exists/main.nf b/target/nextflow/copy_if_exists/main.nf new file mode 100644 index 0000000..75c67cd --- /dev/null +++ b/target/nextflow/copy_if_exists/main.nf @@ -0,0 +1,3813 @@ +// copy_if_exists main +// +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "copy_if_exists", + "version" : "main", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--required_file", + "example" : [ + "/tmp/rnaseq_workflow_config/required_file.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--optional_file", + "example" : [ + "/tmp/rnaseq_workflow_config/optional_file.txt" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "copy_if_exists_output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/assets/required_file.txt" + }, + { + "type" : "file", + "path" : "/src/assets/optional_file.txt" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "main" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "main", + "namespace_separator" : "/" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/extra/copy_if_exists/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "/workdir/root/repo/target/nextflow/copy_if_exists", + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" + }, + "package_config" : { + "name" : "rnaseq", + "version" : "main", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/rnaseq/v1", + "dest" : "testData" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "biobox", + "repo" : "biobox", + "tag" : "main" + }, + { + "type" : "vsh", + "name" : "craftbox", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "viash_version" : "0.9.2", + "source" : "/workdir/root/repo/src", + "target" : "/workdir/root/repo/target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "organization" : "vsh" + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_REQUIRED_FILE+x} ]; then echo "${VIASH_PAR_REQUIRED_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_required_file='&'#" ; else echo "# par_required_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OPTIONAL_FILE+x} ]; then echo "${VIASH_PAR_OPTIONAL_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_optional_file='&'#" ; else echo "# par_optional_file="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p \\$par_output + +# This file is checked by the Nextflow module wrapper +cp \\$par_required_file "\\$par_output" + +# If the variable is empty, we use the default one (registered as a resource) +if [ -z \\$par_optional_file ]; then + echo "No optional_file provided, using the default" + cp \\$meta_resources_dir/optional_file.txt "\\$par_output" +else + echo "Optional file provided" + if [ -f \\$par_optional_file ]; then + cp \\$par_optional_file "\\$par_output" + else + # Unreachable: the Viash-generated module checks this + echo "Optional file does not exist" + exit 1 + fi +fi + +echo "Done" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/rnaseq/copy_if_exists", + "tag" : "main" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/copy_if_exists/nextflow.config b/target/nextflow/copy_if_exists/nextflow.config new file mode 100644 index 0000000..a375ce3 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow.config @@ -0,0 +1,124 @@ +manifest { + name = 'copy_if_exists' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/copy_if_exists/nextflow_labels.config b/target/nextflow/copy_if_exists/nextflow_labels.config new file mode 100644 index 0000000..b18e545 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow_labels.config @@ -0,0 +1,44 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // Resource labels + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 2 } + withLabel: midcpu { cpus = 4 } + withLabel: highcpu { cpus = 8 } + withLabel: veryhighcpu { cpus = 16 } + + withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 24.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 48.GB * task.attempt ) } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} + diff --git a/target/nextflow/copy_if_exists/nextflow_schema.json b/target/nextflow/copy_if_exists/nextflow_schema.json new file mode 100644 index 0000000..ce16f25 --- /dev/null +++ b/target/nextflow/copy_if_exists/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "copy_if_exists", +"description": "No description", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "required_file": { + "type": + "string", + "description": "Type: `file`, required, example: `/tmp/rnaseq_workflow_config/required_file.txt`. ", + "help_text": "Type: `file`, required, example: `/tmp/rnaseq_workflow_config/required_file.txt`. " + + } + + + , + "optional_file": { + "type": + "string", + "description": "Type: `file`, example: `/tmp/rnaseq_workflow_config/optional_file.txt`. ", + "help_text": "Type: `file`, example: `/tmp/rnaseq_workflow_config/optional_file.txt`. " + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `copy_if_exists_output`. ", + "help_text": "Type: `file`, default: `copy_if_exists_output`. " + , + "default":"copy_if_exists_output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/copy_if_exists/optional_file.txt b/target/nextflow/copy_if_exists/optional_file.txt new file mode 100644 index 0000000..6462a73 --- /dev/null +++ b/target/nextflow/copy_if_exists/optional_file.txt @@ -0,0 +1 @@ +Optional! diff --git a/target/nextflow/copy_if_exists/required_file.txt b/target/nextflow/copy_if_exists/required_file.txt new file mode 100644 index 0000000..b4dbbb5 --- /dev/null +++ b/target/nextflow/copy_if_exists/required_file.txt @@ -0,0 +1 @@ +Required! diff --git a/target/nextflow/deseq2_qc/.config.vsh.yaml b/target/nextflow/deseq2_qc/.config.vsh.yaml index 24d3334..fe9692f 100644 --- a/target/nextflow/deseq2_qc/.config.vsh.yaml +++ b/target/nextflow/deseq2_qc/.config.vsh.yaml @@ -133,6 +133,9 @@ info: - "modules/local/deseq2_qc.nf" last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -215,11 +218,20 @@ runners: engines: - type: "docker" id: "docker" - image: "rocker/r2u:22.04" + image: "debian:latest" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: + - type: "apt" + packages: + - "libcurl4-openssl-dev" + - "r-base" + - "r-base-core" + - "libxml2-dev" + - "procps" + - "libssl-dev" + interactive: false - type: "r" cran: - "optparse" @@ -231,6 +243,7 @@ engines: bioc: - "DESeq2" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -241,15 +254,15 @@ build_info: engine: "docker|native" output: "target/nextflow/deseq2_qc" executable: "target/nextflow/deseq2_qc/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -260,7 +273,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/deseq2_qc/main.nf b/target/nextflow/deseq2_qc/main.nf index e096b11..c953dfc 100644 --- a/target/nextflow/deseq2_qc/main.nf +++ b/target/nextflow/deseq2_qc/main.nf @@ -1,6 +1,6 @@ // deseq2_qc main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2979,6 +3210,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3079,11 +3314,23 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "rocker/r2u:22.04", + "image" : "debian:latest", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", "setup" : [ + { + "type" : "apt", + "packages" : [ + "libcurl4-openssl-dev", + "r-base", + "r-base-core", + "libxml2-dev", + "procps", + "libssl-dev" + ], + "interactive" : false + }, { "type" : "r", "cran" : [ @@ -3097,7 +3344,8 @@ meta = [ "bioc" : [ "DESeq2" ], - "bioc_force_install" : false + "bioc_force_install" : false, + "warnings_as_errors" : true } ] }, @@ -3111,9 +3359,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/deseq2_qc", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3121,7 +3369,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3140,7 +3388,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ @@ -3296,7 +3544,7 @@ saveRDS(dds, file = sub("\\\\\\\\.dds\\\\\\\\.RData\\$", ".rds", DDSFile)) ##' @author Gavin Kelly plotPCA_vst <- function(object, ntop = 500, assay = length(assays(object))) { - rv <- rowVars(assay(object, assay)) + rv <- rowVars(assay(object, assay), useNames = TRUE) select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] pca <- prcomp(t(assay(object, assay)[select, ]), center = TRUE, scale = FALSE) percentVar <- pca\\$sdev^2 / sum(pca\\$sdev^2) diff --git a/target/nextflow/deseq2_qc/nextflow_schema.json b/target/nextflow/deseq2_qc/nextflow_schema.json index 828a82f..ab77840 100644 --- a/target/nextflow/deseq2_qc/nextflow_schema.json +++ b/target/nextflow/deseq2_qc/nextflow_schema.json @@ -102,10 +102,10 @@ "outdir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.outdir.outdir`. ", - "help_text": "Type: `file`, default: `$id.$key.outdir.outdir`. " + "description": "Type: `file`, default: `deseq2`. ", + "help_text": "Type: `file`, default: `deseq2`. " , - "default":"$id.$key.outdir.outdir" + "default":"deseq2" } @@ -113,10 +113,10 @@ "pca_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pca_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pca_multiqc.tsv`. " + "description": "Type: `file`, default: `deseq2.pca.vals_mqc.tsv`. ", + "help_text": "Type: `file`, default: `deseq2.pca.vals_mqc.tsv`. " , - "default":"$id.$key.pca_multiqc.tsv" + "default":"deseq2.pca.vals_mqc.tsv" } @@ -124,10 +124,10 @@ "sample_dists_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.sample_dists_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.sample_dists_multiqc.tsv`. " + "description": "Type: `file`, default: `deseq2.sample.dists_mqc.tsv`. ", + "help_text": "Type: `file`, default: `deseq2.sample.dists_mqc.tsv`. " , - "default":"$id.$key.sample_dists_multiqc.tsv" + "default":"deseq2.sample.dists_mqc.tsv" } diff --git a/target/nextflow/dupradar/.config.vsh.yaml b/target/nextflow/dupradar/.config.vsh.yaml index 1922fbc..2b961b8 100644 --- a/target/nextflow/dupradar/.config.vsh.yaml +++ b/target/nextflow/dupradar/.config.vsh.yaml @@ -165,6 +165,9 @@ info: - "modules/local/dupradar.nf" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -260,6 +263,7 @@ engines: bioc: - "dupRadar" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -270,15 +274,15 @@ build_info: engine: "docker|native" output: "target/nextflow/dupradar" executable: "target/nextflow/dupradar/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -289,7 +293,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/dupradar/main.nf b/target/nextflow/dupradar/main.nf index 403868e..ca10e7e 100644 --- a/target/nextflow/dupradar/main.nf +++ b/target/nextflow/dupradar/main.nf @@ -1,6 +1,6 @@ // dupradar main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3012,6 +3243,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3129,7 +3364,8 @@ meta = [ "bioc" : [ "dupRadar" ], - "bioc_force_install" : false + "bioc_force_install" : false, + "warnings_as_errors" : true } ] }, @@ -3143,9 +3379,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/dupradar", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3153,7 +3389,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3172,7 +3408,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/dupradar/nextflow_schema.json b/target/nextflow/dupradar/nextflow_schema.json index d5c3ad3..afa1331 100644 --- a/target/nextflow/dupradar/nextflow_schema.json +++ b/target/nextflow/dupradar/nextflow_schema.json @@ -79,10 +79,10 @@ "output_dupmatrix": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts" + "description": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" , - "default":"$id.$key.output_dupmatrix.txt" + "default":"$id.dup_matrix.txt" } @@ -90,10 +90,10 @@ "output_dup_intercept_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", - "help_text": "Type: `file`, default: `$id.$key.output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + "description": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" , - "default":"$id.$key.output_dup_intercept_mqc.txt" + "default":"$id.dup_intercept_mqc.txt" } @@ -101,10 +101,10 @@ "output_duprate_exp_boxplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", - "help_text": "Type: `file`, default: `$id.$key.output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + "description": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" , - "default":"$id.$key.output_duprate_exp_boxplot.pdf" + "default":"$id.duprate_exp_boxplot.pdf" } @@ -112,10 +112,10 @@ "output_duprate_exp_densplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + "description": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" , - "default":"$id.$key.output_duprate_exp_densplot.pdf" + "default":"$id.duprate_exp_densityplot.pdf" } @@ -123,10 +123,10 @@ "output_duprate_exp_denscurve_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duprate_exp_denscurve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc", - "help_text": "Type: `file`, default: `$id.$key.output_duprate_exp_denscurve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc" + "description": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.txt`. path to output file (pdf) of density curve of gene duplication multiqc" , - "default":"$id.$key.output_duprate_exp_denscurve_mqc.txt" + "default":"$id.duprate_exp_density_curve_mqc.txt" } @@ -134,10 +134,10 @@ "output_expression_histogram": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", - "help_text": "Type: `file`, default: `$id.$key.output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + "description": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" , - "default":"$id.$key.output_expression_histogram.pdf" + "default":"$id.expression_hist.pdf" } @@ -145,10 +145,10 @@ "output_intercept_slope": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_intercept_slope.txt`. output file (txt) with progression of duplication rate value", - "help_text": "Type: `file`, default: `$id.$key.output_intercept_slope.txt`. output file (txt) with progression of duplication rate value" + "description": "Type: `file`, default: `$id.intercept_slope.txt`. output file (txt) with progression of duplication rate value", + "help_text": "Type: `file`, default: `$id.intercept_slope.txt`. output file (txt) with progression of duplication rate value" , - "default":"$id.$key.output_intercept_slope.txt" + "default":"$id.intercept_slope.txt" } diff --git a/target/nextflow/getchromsizes/.config.vsh.yaml b/target/nextflow/getchromsizes/.config.vsh.yaml index a03c7e0..6286182 100644 --- a/target/nextflow/getchromsizes/.config.vsh.yaml +++ b/target/nextflow/getchromsizes/.config.vsh.yaml @@ -67,6 +67,9 @@ info: - "modules/nf-core/custom/getchromsizes/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -171,15 +174,15 @@ build_info: engine: "docker|native" output: "target/nextflow/getchromsizes" executable: "target/nextflow/getchromsizes/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -190,7 +193,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/getchromsizes/main.nf b/target/nextflow/getchromsizes/main.nf index 45c6af1..a72dbff 100644 --- a/target/nextflow/getchromsizes/main.nf +++ b/target/nextflow/getchromsizes/main.nf @@ -1,6 +1,6 @@ // getchromsizes main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2897,6 +3128,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3020,9 +3255,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/getchromsizes", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3030,7 +3265,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3049,7 +3284,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/getchromsizes/nextflow_schema.json b/target/nextflow/getchromsizes/nextflow_schema.json index f6a05b2..c59b3c1 100644 --- a/target/nextflow/getchromsizes/nextflow_schema.json +++ b/target/nextflow/getchromsizes/nextflow_schema.json @@ -37,10 +37,10 @@ "sizes": { "type": "string", - "description": "Type: `file`, default: `$id.$key.sizes.sizes`. File containing chromosome lengths", - "help_text": "Type: `file`, default: `$id.$key.sizes.sizes`. File containing chromosome lengths" + "description": "Type: `file`, default: `$id.$key.sizes`. File containing chromosome lengths", + "help_text": "Type: `file`, default: `$id.$key.sizes`. File containing chromosome lengths" , - "default":"$id.$key.sizes.sizes" + "default":"$id.$key.sizes" } @@ -48,10 +48,10 @@ "fai": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file", - "help_text": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file" + "description": "Type: `file`, default: `$id.$key.fai`. FASTA index file", + "help_text": "Type: `file`, default: `$id.$key.fai`. FASTA index file" , - "default":"$id.$key.fai.fai" + "default":"$id.$key.fai" } @@ -59,10 +59,10 @@ "gzi": { "type": "string", - "description": "Type: `file`, default: `$id.$key.gzi.gzi`. Optional gzip index file for compressed inputs", - "help_text": "Type: `file`, default: `$id.$key.gzi.gzi`. Optional gzip index file for compressed inputs" + "description": "Type: `file`, default: `$id.$key.gzi`. Optional gzip index file for compressed inputs", + "help_text": "Type: `file`, default: `$id.$key.gzi`. Optional gzip index file for compressed inputs" , - "default":"$id.$key.gzi.gzi" + "default":"$id.$key.gzi" } diff --git a/target/nextflow/gtf2bed/.config.vsh.yaml b/target/nextflow/gtf2bed/.config.vsh.yaml index 10eb49f..4213376 100644 --- a/target/nextflow/gtf2bed/.config.vsh.yaml +++ b/target/nextflow/gtf2bed/.config.vsh.yaml @@ -48,6 +48,9 @@ info: - "modules/local/gtf2bed.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -149,15 +152,15 @@ build_info: engine: "docker|native" output: "target/nextflow/gtf2bed" executable: "target/nextflow/gtf2bed/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -168,7 +171,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/gtf2bed/main.nf b/target/nextflow/gtf2bed/main.nf index 74607b1..28b2e78 100644 --- a/target/nextflow/gtf2bed/main.nf +++ b/target/nextflow/gtf2bed/main.nf @@ -1,6 +1,6 @@ // gtf2bed main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2878,6 +3109,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3002,9 +3237,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gtf2bed", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3012,7 +3247,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3031,7 +3266,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/gtf2bed/nextflow_schema.json b/target/nextflow/gtf2bed/nextflow_schema.json index ae24eac..d459f39 100644 --- a/target/nextflow/gtf2bed/nextflow_schema.json +++ b/target/nextflow/gtf2bed/nextflow_schema.json @@ -37,10 +37,10 @@ "bed_output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.bed_output.bed_output`. BED file resulting from the conversion of the GTF input file", - "help_text": "Type: `file`, required, default: `$id.$key.bed_output.bed_output`. BED file resulting from the conversion of the GTF input file." + "description": "Type: `file`, required, default: `$id.$key.bed_output`. BED file resulting from the conversion of the GTF input file", + "help_text": "Type: `file`, required, default: `$id.$key.bed_output`. BED file resulting from the conversion of the GTF input file." , - "default":"$id.$key.bed_output.bed_output" + "default":"$id.$key.bed_output" } diff --git a/target/nextflow/gtf_filter/.config.vsh.yaml b/target/nextflow/gtf_filter/.config.vsh.yaml index 3869914..f6a16f9 100644 --- a/target/nextflow/gtf_filter/.config.vsh.yaml +++ b/target/nextflow/gtf_filter/.config.vsh.yaml @@ -63,6 +63,9 @@ info: - "modules/local/gtf_filter.nf" last_sha: "1c6012ecbb087014ea4b8f0f3d39b874850277a8" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -159,15 +162,15 @@ build_info: engine: "docker|native" output: "target/nextflow/gtf_filter" executable: "target/nextflow/gtf_filter/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -178,7 +181,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/gtf_filter/main.nf b/target/nextflow/gtf_filter/main.nf index 360a3fe..6c34602 100644 --- a/target/nextflow/gtf_filter/main.nf +++ b/target/nextflow/gtf_filter/main.nf @@ -1,6 +1,6 @@ // gtf_filter main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2895,6 +3126,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3010,9 +3245,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gtf_filter", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3020,7 +3255,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3039,7 +3274,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/gtf_filter/nextflow_schema.json b/target/nextflow/gtf_filter/nextflow_schema.json index ded4692..89fc1ac 100644 --- a/target/nextflow/gtf_filter/nextflow_schema.json +++ b/target/nextflow/gtf_filter/nextflow_schema.json @@ -58,10 +58,10 @@ "filtered_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.filtered_gtf.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file", - "help_text": "Type: `file`, default: `$id.$key.filtered_gtf.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file" + "description": "Type: `file`, default: `$id.$key.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file", + "help_text": "Type: `file`, default: `$id.$key.filtered_gtf`. Filtered GTF file containing only sequences in the FASTA file" , - "default":"$id.$key.filtered_gtf.filtered_gtf" + "default":"$id.$key.filtered_gtf" } diff --git a/target/nextflow/gunzip/.config.vsh.yaml b/target/nextflow/gunzip/.config.vsh.yaml index 2c83ab2..78619ec 100644 --- a/target/nextflow/gunzip/.config.vsh.yaml +++ b/target/nextflow/gunzip/.config.vsh.yaml @@ -47,6 +47,9 @@ info: - "modules/nf-core/gunzip/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -148,15 +151,15 @@ build_info: engine: "docker|native" output: "target/nextflow/gunzip" executable: "target/nextflow/gunzip/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -167,7 +170,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/gunzip/main.nf b/target/nextflow/gunzip/main.nf index 517e3e8..0f82e85 100644 --- a/target/nextflow/gunzip/main.nf +++ b/target/nextflow/gunzip/main.nf @@ -1,6 +1,6 @@ // gunzip main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2875,6 +3106,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2999,9 +3234,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gunzip", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3009,7 +3244,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3028,7 +3263,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/gunzip/nextflow_schema.json b/target/nextflow/gunzip/nextflow_schema.json index 19860ae..798f4f3 100644 --- a/target/nextflow/gunzip/nextflow_schema.json +++ b/target/nextflow/gunzip/nextflow_schema.json @@ -37,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Decompressed file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Decompressed file." + "description": "Type: `file`, required, default: `$id.$key.output`. Decompressed file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Decompressed file." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } diff --git a/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml index 8fb581f..f7aff9c 100644 --- a/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml +++ b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml @@ -73,6 +73,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -169,15 +172,15 @@ build_info: engine: "docker|native" output: "target/nextflow/multiqc_custom_biotype" executable: "target/nextflow/multiqc_custom_biotype/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -188,7 +191,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/multiqc_custom_biotype/main.nf b/target/nextflow/multiqc_custom_biotype/main.nf index ed603dc..91941a3 100644 --- a/target/nextflow/multiqc_custom_biotype/main.nf +++ b/target/nextflow/multiqc_custom_biotype/main.nf @@ -1,6 +1,6 @@ // multiqc_custom_biotype main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2904,6 +3135,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3019,9 +3254,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/multiqc_custom_biotype", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3029,7 +3264,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3048,7 +3283,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/multiqc_custom_biotype/nextflow_schema.json b/target/nextflow/multiqc_custom_biotype/nextflow_schema.json index 73e6582..8645a76 100644 --- a/target/nextflow/multiqc_custom_biotype/nextflow_schema.json +++ b/target/nextflow/multiqc_custom_biotype/nextflow_schema.json @@ -59,10 +59,10 @@ "featurecounts_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. " + "description": "Type: `file`, default: `$id.biotype_counts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.biotype_counts_mqc.tsv`. " , - "default":"$id.$key.featurecounts_multiqc.tsv" + "default":"$id.biotype_counts_mqc.tsv" } @@ -70,10 +70,10 @@ "featurecounts_rrna_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. " + "description": "Type: `file`, default: `$id.biotype_counts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.biotype_counts_rrna_mqc.tsv`. " , - "default":"$id.$key.featurecounts_rrna_multiqc.tsv" + "default":"$id.biotype_counts_rrna_mqc.tsv" } diff --git a/target/nextflow/picard_markduplicates/.config.vsh.yaml b/target/nextflow/picard_markduplicates/.config.vsh.yaml index 0f36a43..5f30897 100644 --- a/target/nextflow/picard_markduplicates/.config.vsh.yaml +++ b/target/nextflow/picard_markduplicates/.config.vsh.yaml @@ -107,6 +107,9 @@ info: - "modules/nf-core/picard/markduplicates/meta.yml" last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -211,15 +214,15 @@ build_info: engine: "docker|native" output: "target/nextflow/picard_markduplicates" executable: "target/nextflow/picard_markduplicates/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -230,7 +233,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/picard_markduplicates/main.nf b/target/nextflow/picard_markduplicates/main.nf index 6f8afbd..63bed72 100644 --- a/target/nextflow/picard_markduplicates/main.nf +++ b/target/nextflow/picard_markduplicates/main.nf @@ -1,6 +1,6 @@ // picard_markduplicates main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2944,6 +3175,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3070,9 +3305,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/picard_markduplicates", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3080,7 +3315,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3099,7 +3334,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/picard_markduplicates/nextflow_schema.json b/target/nextflow/picard_markduplicates/nextflow_schema.json index 7195d87..3e64f4b 100644 --- a/target/nextflow/picard_markduplicates/nextflow_schema.json +++ b/target/nextflow/picard_markduplicates/nextflow_schema.json @@ -68,10 +68,10 @@ "output_bam": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_bam.bam`. BAM file with duplicate reads marked/removed", - "help_text": "Type: `file`, default: `$id.$key.output_bam.bam`. BAM file with duplicate reads marked/removed" + "description": "Type: `file`, default: `$id.MarkDuplicates.bam`. BAM file with duplicate reads marked/removed", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.bam`. BAM file with duplicate reads marked/removed" , - "default":"$id.$key.output_bam.bam" + "default":"$id.MarkDuplicates.bam" } @@ -79,10 +79,10 @@ "bai": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bai.bai`. An optional BAM index file", - "help_text": "Type: `file`, default: `$id.$key.bai.bai`. An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag" + "description": "Type: `file`, default: `$id.MarkDuplicates.bam.bai`. An optional BAM index file", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.bam.bai`. An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag" , - "default":"$id.$key.bai.bai" + "default":"$id.MarkDuplicates.bam.bai" } @@ -90,10 +90,10 @@ "metrics": { "type": "string", - "description": "Type: `file`, default: `$id.$key.metrics.txt`. Duplicate metrics file generated by picard", - "help_text": "Type: `file`, default: `$id.$key.metrics.txt`. Duplicate metrics file generated by picard" + "description": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. Duplicate metrics file generated by picard", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. Duplicate metrics file generated by picard" , - "default":"$id.$key.metrics.txt" + "default":"$id.MarkDuplicates.metrics.txt" } diff --git a/target/nextflow/prepare_multiqc_input/.config.vsh.yaml b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml index 590f04e..3ef020c 100644 --- a/target/nextflow/prepare_multiqc_input/.config.vsh.yaml +++ b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml @@ -320,6 +320,9 @@ resources: description: "Prepare directory with all the input files for MultiQC.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -416,15 +419,15 @@ build_info: engine: "docker|native" output: "target/nextflow/prepare_multiqc_input" executable: "target/nextflow/prepare_multiqc_input/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -435,7 +438,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/prepare_multiqc_input/main.nf b/target/nextflow/prepare_multiqc_input/main.nf index aac0401..0cc4655 100644 --- a/target/nextflow/prepare_multiqc_input/main.nf +++ b/target/nextflow/prepare_multiqc_input/main.nf @@ -1,6 +1,6 @@ // prepare_multiqc_input main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3174,6 +3405,10 @@ meta = [ ], "description" : "Prepare directory with all the input files for MultiQC.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3289,9 +3524,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/prepare_multiqc_input", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3299,7 +3534,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3318,7 +3553,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/prepare_multiqc_input/nextflow_schema.json b/target/nextflow/prepare_multiqc_input/nextflow_schema.json index 6e1e1db..9f56067 100644 --- a/target/nextflow/prepare_multiqc_input/nextflow_schema.json +++ b/target/nextflow/prepare_multiqc_input/nextflow_schema.json @@ -357,10 +357,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. ", - "help_text": "Type: `file`, default: `$id.$key.output.output`. " + "description": "Type: `file`, default: `multiqc_input`. ", + "help_text": "Type: `file`, default: `multiqc_input`. " , - "default":"$id.$key.output.output" + "default":"multiqc_input" } diff --git a/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml index 992adc6..8ccf33a 100644 --- a/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml +++ b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml @@ -46,6 +46,9 @@ info: - "modules/local/preprocess_transcripts_fasta_gencode.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -142,15 +145,15 @@ build_info: engine: "docker|native" output: "target/nextflow/preprocess_transcripts_fasta" executable: "target/nextflow/preprocess_transcripts_fasta/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -161,7 +164,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/preprocess_transcripts_fasta/main.nf b/target/nextflow/preprocess_transcripts_fasta/main.nf index 2496d32..12729f8 100644 --- a/target/nextflow/preprocess_transcripts_fasta/main.nf +++ b/target/nextflow/preprocess_transcripts_fasta/main.nf @@ -1,6 +1,6 @@ // preprocess_transcripts_fasta main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2874,6 +3105,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2989,9 +3224,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/preprocess_transcripts_fasta", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -2999,7 +3234,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3018,7 +3253,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json b/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json index 5015eae..9ff11b1 100644 --- a/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json +++ b/target/nextflow/preprocess_transcripts_fasta/nextflow_schema.json @@ -37,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Path of processed output FASTA file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Path of processed output FASTA file." + "description": "Type: `file`, required, default: `$id.$key.output`. Path of processed output FASTA file", + "help_text": "Type: `file`, required, default: `$id.$key.output`. Path of processed output FASTA file." , - "default":"$id.$key.output.output" + "default":"$id.$key.output" } diff --git a/target/nextflow/preseq_lcextrap/.config.vsh.yaml b/target/nextflow/preseq_lcextrap/.config.vsh.yaml index 3cadd56..4711f99 100644 --- a/target/nextflow/preseq_lcextrap/.config.vsh.yaml +++ b/target/nextflow/preseq_lcextrap/.config.vsh.yaml @@ -67,6 +67,9 @@ info: - "modules/nf-core/preseq/lcextrap/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,15 +198,15 @@ build_info: engine: "docker|native" output: "target/nextflow/preseq_lcextrap" executable: "target/nextflow/preseq_lcextrap/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -214,7 +217,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/preseq_lcextrap/main.nf b/target/nextflow/preseq_lcextrap/main.nf index 09bddec..6ab7c8d 100644 --- a/target/nextflow/preseq_lcextrap/main.nf +++ b/target/nextflow/preseq_lcextrap/main.nf @@ -1,6 +1,6 @@ // preseq_lcextrap main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2898,6 +3129,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3042,9 +3277,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/preseq_lcextrap", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3052,7 +3287,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3071,7 +3306,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/preseq_lcextrap/nextflow_schema.json b/target/nextflow/preseq_lcextrap/nextflow_schema.json index 776bf27..4947c16 100644 --- a/target/nextflow/preseq_lcextrap/nextflow_schema.json +++ b/target/nextflow/preseq_lcextrap/nextflow_schema.json @@ -57,10 +57,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. " + "description": "Type: `file`, default: `$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `$id.lc_extrap.txt`. " , - "default":"$id.$key.output.txt" + "default":"$id.lc_extrap.txt" } diff --git a/target/nextflow/rsem_merge_counts/.config.vsh.yaml b/target/nextflow/rsem_merge_counts/.config.vsh.yaml index 8b1c871..ac7dcad 100644 --- a/target/nextflow/rsem_merge_counts/.config.vsh.yaml +++ b/target/nextflow/rsem_merge_counts/.config.vsh.yaml @@ -89,6 +89,9 @@ info: - "modules/local/rsem_merge_counts/main.nf" last_sha: "311279532694ce7520164ce4d65a388c0cd11f60" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -185,15 +188,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rsem_merge_counts" executable: "target/nextflow/rsem_merge_counts/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -204,7 +207,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rsem_merge_counts/main.nf b/target/nextflow/rsem_merge_counts/main.nf index 93c63b4..9f08fcf 100644 --- a/target/nextflow/rsem_merge_counts/main.nf +++ b/target/nextflow/rsem_merge_counts/main.nf @@ -1,6 +1,6 @@ // rsem_merge_counts main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2919,6 +3150,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3034,9 +3269,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rsem_merge_counts", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3044,7 +3279,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3063,7 +3298,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rsem_merge_counts/nextflow_schema.json b/target/nextflow/rsem_merge_counts/nextflow_schema.json index 2ec8321..e89573a 100644 --- a/target/nextflow/rsem_merge_counts/nextflow_schema.json +++ b/target/nextflow/rsem_merge_counts/nextflow_schema.json @@ -47,10 +47,10 @@ "merged_gene_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.merged_gene_counts.tsv`. File containing gene counts across all samples", - "help_text": "Type: `file`, default: `$id.$key.merged_gene_counts.tsv`. File containing gene counts across all samples." + "description": "Type: `file`, default: `rsem.merged.gene_counts.tsv`. File containing gene counts across all samples", + "help_text": "Type: `file`, default: `rsem.merged.gene_counts.tsv`. File containing gene counts across all samples." , - "default":"$id.$key.merged_gene_counts.tsv" + "default":"rsem.merged.gene_counts.tsv" } @@ -58,10 +58,10 @@ "merged_gene_tpm": { "type": "string", - "description": "Type: `file`, default: `$id.$key.merged_gene_tpm.tsv`. File containing gene TPM across all samples", - "help_text": "Type: `file`, default: `$id.$key.merged_gene_tpm.tsv`. File containing gene TPM across all samples." + "description": "Type: `file`, default: `rsem.merged.gene_tpm.tsv`. File containing gene TPM across all samples", + "help_text": "Type: `file`, default: `rsem.merged.gene_tpm.tsv`. File containing gene TPM across all samples." , - "default":"$id.$key.merged_gene_tpm.tsv" + "default":"rsem.merged.gene_tpm.tsv" } @@ -69,10 +69,10 @@ "merged_transcript_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.merged_transcript_counts.tsv`. File containing transcript counts across all samples", - "help_text": "Type: `file`, default: `$id.$key.merged_transcript_counts.tsv`. File containing transcript counts across all samples." + "description": "Type: `file`, default: `rsem.merged.transcript_counts.tsv`. File containing transcript counts across all samples", + "help_text": "Type: `file`, default: `rsem.merged.transcript_counts.tsv`. File containing transcript counts across all samples." , - "default":"$id.$key.merged_transcript_counts.tsv" + "default":"rsem.merged.transcript_counts.tsv" } @@ -80,10 +80,10 @@ "merged_transcript_tpm": { "type": "string", - "description": "Type: `file`, default: `$id.$key.merged_transcript_tpm.tsv`. File containing transcript TPM across all samples", - "help_text": "Type: `file`, default: `$id.$key.merged_transcript_tpm.tsv`. File containing transcript TPM across all samples." + "description": "Type: `file`, default: `rsem.merged.transcript_tpm.tsv`. File containing transcript TPM across all samples", + "help_text": "Type: `file`, default: `rsem.merged.transcript_tpm.tsv`. File containing transcript TPM across all samples." , - "default":"$id.$key.merged_transcript_tpm.tsv" + "default":"rsem.merged.transcript_tpm.tsv" } diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml index ff63b15..508ee7f 100644 --- a/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -157,6 +157,9 @@ info: paths: - "modules/nf-core/rseqc/junctionannotation/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -264,15 +267,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_junctionannotation" executable: "target/nextflow/rseqc/rseqc_junctionannotation/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -283,7 +286,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/main.nf b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf index 60e0235..122e8e9 100644 --- a/target/nextflow/rseqc/rseqc_junctionannotation/main.nf +++ b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf @@ -1,6 +1,6 @@ // rseqc_junctionannotation main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3002,6 +3233,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3135,9 +3370,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_junctionannotation", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3145,7 +3380,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3164,7 +3399,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json index d5d5891..1f8d4ee 100644 --- a/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json +++ b/target/nextflow/rseqc/rseqc_junctionannotation/nextflow_schema.json @@ -69,10 +69,10 @@ "output_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_log.log`. output log of junction annotation script", - "help_text": "Type: `file`, default: `$id.$key.output_log.log`. output log of junction annotation script" + "description": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script" , - "default":"$id.$key.output_log.log" + "default":"$id.junction_annotation.log" } @@ -80,10 +80,10 @@ "output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot_r.r`. r script to generate splice_junction and splice_events plot", - "help_text": "Type: `file`, default: `$id.$key.output_plot_r.r`. r script to generate splice_junction and splice_events plot" + "description": "Type: `file`, default: `$id.junction_annotation_plot.r`. r script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `$id.junction_annotation_plot.r`. r script to generate splice_junction and splice_events plot" , - "default":"$id.$key.output_plot_r.r" + "default":"$id.junction_annotation_plot.r" } @@ -91,10 +91,10 @@ "output_junction_bed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_junction_bed.bed`. junction annotation file (bed format)", - "help_text": "Type: `file`, default: `$id.$key.output_junction_bed.bed`. junction annotation file (bed format)" + "description": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)" , - "default":"$id.$key.output_junction_bed.bed" + "default":"$id.junction_annotation.bed" } @@ -102,10 +102,10 @@ "output_junction_interact": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_junction_interact.bed`. interact file (bed format) of junctions", - "help_text": "Type: `file`, default: `$id.$key.output_junction_interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + "description": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." , - "default":"$id.$key.output_junction_interact.bed" + "default":"$id.junction_annotation.Interact.bed" } @@ -113,10 +113,10 @@ "output_junction_sheet": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_junction_sheet.xls`. junction annotation file (xls format)", - "help_text": "Type: `file`, default: `$id.$key.output_junction_sheet.xls`. junction annotation file (xls format)" + "description": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)" , - "default":"$id.$key.output_junction_sheet.xls" + "default":"$id.junction_annotation.xls" } @@ -124,10 +124,10 @@ "output_splice_events_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_splice_events_plot.pdf`. plot of splice events (pdf)", - "help_text": "Type: `file`, default: `$id.$key.output_splice_events_plot.pdf`. plot of splice events (pdf)" + "description": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)" , - "default":"$id.$key.output_splice_events_plot.pdf" + "default":"$id.splice_events.pdf" } @@ -135,10 +135,10 @@ "output_splice_junctions_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_splice_junctions_plot.pdf`. plot of junctions (pdf)", - "help_text": "Type: `file`, default: `$id.$key.output_splice_junctions_plot.pdf`. plot of junctions (pdf)" + "description": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" , - "default":"$id.$key.output_splice_junctions_plot.pdf" + "default":"$id.splice_junctions_plot.pdf" } diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml index 730be0b..6f4170b 100644 --- a/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -146,6 +146,9 @@ info: paths: - "modules/nf-core/rseqc/junctionsaturation/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -253,15 +256,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_junctionsaturation" executable: "target/nextflow/rseqc/rseqc_junctionsaturation/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -272,7 +275,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf index d4b9117..68cf6db 100644 --- a/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf @@ -1,6 +1,6 @@ // rseqc_junctionsaturation main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2987,6 +3218,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3120,9 +3355,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_junctionsaturation", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3130,7 +3365,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3149,7 +3384,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json index f1e6368..ee7a48b 100644 --- a/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/nextflow_schema.json @@ -113,10 +113,10 @@ "output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot_r.r`. r script to generate junction_saturation_plot plot", - "help_text": "Type: `file`, default: `$id.$key.output_plot_r.r`. r script to generate junction_saturation_plot plot" + "description": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" , - "default":"$id.$key.output_plot_r.r" + "default":"$id.junction_saturation_plot.r" } @@ -124,10 +124,10 @@ "output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot.pdf`. plot of junction saturation (pdf)", - "help_text": "Type: `file`, default: `$id.$key.output_plot.pdf`. plot of junction saturation (pdf)" + "description": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf)", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf)" , - "default":"$id.$key.output_plot.pdf" + "default":"$id.junction_saturation_plot.pdf" } diff --git a/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml index 8bf4ffa..5a505ca 100644 --- a/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -60,6 +60,9 @@ info: paths: - "modules/nf-core/rseqc/readdistribution/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -166,15 +169,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_readdistribution" executable: "target/nextflow/rseqc/rseqc_readdistribution/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -185,7 +188,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_readdistribution/main.nf b/target/nextflow/rseqc/rseqc_readdistribution/main.nf index f54c84a..82c67ad 100644 --- a/target/nextflow/rseqc/rseqc_readdistribution/main.nf +++ b/target/nextflow/rseqc/rseqc_readdistribution/main.nf @@ -1,6 +1,6 @@ // rseqc_readdistribution main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2892,6 +3123,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3024,9 +3259,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_readdistribution", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3034,7 +3269,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3053,7 +3288,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json index f53d310..25670b5 100644 --- a/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json +++ b/target/nextflow/rseqc/rseqc_readdistribution/nextflow_schema.json @@ -47,10 +47,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) of read distribution analysis", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) of read distribution analysis." + "description": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis." , - "default":"$id.$key.output.txt" + "default":"$id.read_distribution.txt" } diff --git a/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml index a0bc18d..b50fdcb 100644 --- a/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -108,6 +108,9 @@ info: paths: - "modules/nf-core/rseqc/readduplication/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -215,15 +218,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_readduplication" executable: "target/nextflow/rseqc/rseqc_readduplication/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -234,7 +237,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_readduplication/main.nf b/target/nextflow/rseqc/rseqc_readduplication/main.nf index ed19510..464befb 100644 --- a/target/nextflow/rseqc/rseqc_readduplication/main.nf +++ b/target/nextflow/rseqc/rseqc_readduplication/main.nf @@ -1,6 +1,6 @@ // rseqc_readduplication main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2945,6 +3176,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3078,9 +3313,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_readduplication", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3088,7 +3323,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3107,7 +3342,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json b/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json index 9dbcbb6..3217c51 100644 --- a/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json +++ b/target/nextflow/rseqc/rseqc_readduplication/nextflow_schema.json @@ -59,10 +59,10 @@ "output_duplication_rate_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duplication_rate_plot_r.r`. R script for generating duplication rate plot", - "help_text": "Type: `file`, default: `$id.$key.output_duplication_rate_plot_r.r`. R script for generating duplication rate plot" + "description": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot" , - "default":"$id.$key.output_duplication_rate_plot_r.r" + "default":"$id.duplication_rate_plot.r" } @@ -70,10 +70,10 @@ "output_duplication_rate_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duplication_rate_plot.pdf`. duplication rate plot (pdf)", - "help_text": "Type: `file`, default: `$id.$key.output_duplication_rate_plot.pdf`. duplication rate plot (pdf)" + "description": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" , - "default":"$id.$key.output_duplication_rate_plot.pdf" + "default":"$id.duplication_rate_plot.pdf" } @@ -81,10 +81,10 @@ "output_duplication_rate_mapping": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + "description": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" , - "default":"$id.$key.output_duplication_rate_mapping.xls" + "default":"$id.duplication_rate_mapping.xls" } @@ -92,10 +92,10 @@ "output_duplication_rate_sequence": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication" + "description": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" , - "default":"$id.$key.output_duplication_rate_sequence.xls" + "default":"$id.duplication_rate_sequencing.xls" } diff --git a/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml index 13b5030..877c54e 100644 --- a/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml @@ -114,6 +114,9 @@ info: paths: - "modules/nf-core/rseqc/tin/main.nf" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -218,15 +221,15 @@ build_info: engine: "docker|native" output: "target/nextflow/rseqc/rseqc_tin" executable: "target/nextflow/rseqc/rseqc_tin/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -237,7 +240,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/rseqc/rseqc_tin/main.nf b/target/nextflow/rseqc/rseqc_tin/main.nf index f4d00b8..bf8df0a 100644 --- a/target/nextflow/rseqc/rseqc_tin/main.nf +++ b/target/nextflow/rseqc/rseqc_tin/main.nf @@ -1,6 +1,6 @@ // rseqc_tin main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2953,6 +3184,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3083,9 +3318,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_tin", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3093,7 +3328,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3112,7 +3347,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json b/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json index 0039ad0..becf8ff 100644 --- a/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json +++ b/target/nextflow/rseqc/rseqc_tin/nextflow_schema.json @@ -90,10 +90,10 @@ "output_tin_summary": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", - "help_text": "Type: `file`, default: `$id.$key.output_tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" + "description": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" , - "default":"$id.$key.output_tin_summary.txt" + "default":"$id.tin_summary.txt" } @@ -101,10 +101,10 @@ "output_tin": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_tin.xls`. file with TIN metrics (xls)", - "help_text": "Type: `file`, default: `$id.$key.output_tin.xls`. file with TIN metrics (xls)" + "description": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)" , - "default":"$id.$key.output_tin.xls" + "default":"$id.tin.xls" } diff --git a/target/nextflow/sortmerna/.config.vsh.yaml b/target/nextflow/sortmerna/.config.vsh.yaml index 925ae25..411909b 100644 --- a/target/nextflow/sortmerna/.config.vsh.yaml +++ b/target/nextflow/sortmerna/.config.vsh.yaml @@ -100,6 +100,9 @@ info: - "modules/nf-core/sortmerna/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -196,15 +199,15 @@ build_info: engine: "docker|native" output: "target/nextflow/sortmerna" executable: "target/nextflow/sortmerna/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -215,7 +218,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/sortmerna/main.nf b/target/nextflow/sortmerna/main.nf index 965d599..cf0cc3e 100644 --- a/target/nextflow/sortmerna/main.nf +++ b/target/nextflow/sortmerna/main.nf @@ -1,6 +1,6 @@ // sortmerna main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2934,6 +3165,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3049,9 +3284,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/sortmerna", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3059,7 +3294,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3078,7 +3313,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/sortmerna/nextflow_schema.json b/target/nextflow/sortmerna/nextflow_schema.json index c7fb0fe..4f376dc 100644 --- a/target/nextflow/sortmerna/nextflow_schema.json +++ b/target/nextflow/sortmerna/nextflow_schema.json @@ -57,10 +57,10 @@ "sortmerna_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file", - "help_text": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file." + "description": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file." , - "default":"$id.$key.sortmerna_log.log" + "default":"$id.sortmerna.log" } @@ -68,10 +68,10 @@ "fastq_1": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.fastq_1.gz`. Output file for read 1", - "help_text": "Type: `file`, required, default: `$id.$key.fastq_1.gz`. Output file for read 1." + "description": "Type: `file`, required, default: `$id.$key.read_1.fastq.gz`. Output file for read 1", + "help_text": "Type: `file`, required, default: `$id.$key.read_1.fastq.gz`. Output file for read 1." , - "default":"$id.$key.fastq_1.gz" + "default":"$id.$key.read_1.fastq.gz" } @@ -79,10 +79,10 @@ "fastq_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastq_2.gz`. Output file for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastq_2.gz`. Output file for read 2." + "description": "Type: `file`, default: `$id.$key.read_2.fastq.gz`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.read_2.fastq.gz`. Output file for read 2." , - "default":"$id.$key.fastq_2.gz" + "default":"$id.$key.read_2.fastq.gz" } diff --git a/target/nextflow/stringtie/.config.vsh.yaml b/target/nextflow/stringtie/.config.vsh.yaml index 57c04ca..da12f47 100644 --- a/target/nextflow/stringtie/.config.vsh.yaml +++ b/target/nextflow/stringtie/.config.vsh.yaml @@ -117,6 +117,9 @@ info: - "modules/nf-core/stringtie/stringtie/meta.yml" last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -220,15 +223,15 @@ build_info: engine: "docker|native" output: "target/nextflow/stringtie" executable: "target/nextflow/stringtie/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -239,7 +242,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/stringtie/main.nf b/target/nextflow/stringtie/main.nf index 71b973e..baf1e8b 100644 --- a/target/nextflow/stringtie/main.nf +++ b/target/nextflow/stringtie/main.nf @@ -1,6 +1,6 @@ // stringtie main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2957,6 +3188,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3080,9 +3315,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/stringtie", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3090,7 +3325,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3109,7 +3344,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/stringtie/nextflow_schema.json b/target/nextflow/stringtie/nextflow_schema.json index aeb01f6..484111a 100644 --- a/target/nextflow/stringtie/nextflow_schema.json +++ b/target/nextflow/stringtie/nextflow_schema.json @@ -77,10 +77,10 @@ "transcript_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcript_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.transcript_gtf.gtf`. " + "description": "Type: `file`, default: `$id.$key.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `$id.$key.transcripts.gtf`. " , - "default":"$id.$key.transcript_gtf.gtf" + "default":"$id.$key.transcripts.gtf" } @@ -88,10 +88,10 @@ "coverage_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.coverage_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.coverage_gtf.gtf`. " + "description": "Type: `file`, default: `$id.$key.coverage.gtf`. ", + "help_text": "Type: `file`, default: `$id.$key.coverage.gtf`. " , - "default":"$id.$key.coverage_gtf.gtf" + "default":"$id.$key.coverage.gtf" } @@ -110,10 +110,10 @@ "ballgown": { "type": "string", - "description": "Type: `file`, default: `$id.$key.ballgown.ballgown`. for running ballgown", - "help_text": "Type: `file`, default: `$id.$key.ballgown.ballgown`. for running ballgown" + "description": "Type: `file`, default: `$id.$key.ballgown`. for running ballgown", + "help_text": "Type: `file`, default: `$id.$key.ballgown`. for running ballgown" , - "default":"$id.$key.ballgown.ballgown" + "default":"$id.$key.ballgown" } diff --git a/target/nextflow/summarizedexperiment/.config.vsh.yaml b/target/nextflow/summarizedexperiment/.config.vsh.yaml index d796168..4eb6bca 100644 --- a/target/nextflow/summarizedexperiment/.config.vsh.yaml +++ b/target/nextflow/summarizedexperiment/.config.vsh.yaml @@ -96,6 +96,9 @@ info: - "modules/local/summarizedexperiment/main.nf" last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -178,21 +181,17 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "rocker/r2u:22.04" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "r-base" - - "libcurl4-openssl-dev" - interactive: false - type: "r" bioc: - "SummarizedExperiment" - "tximeta" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -203,15 +202,15 @@ build_info: engine: "docker|native" output: "target/nextflow/summarizedexperiment" executable: "target/nextflow/summarizedexperiment/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -222,7 +221,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/summarizedexperiment/main.nf b/target/nextflow/summarizedexperiment/main.nf index 25faa99..5f63199 100644 --- a/target/nextflow/summarizedexperiment/main.nf +++ b/target/nextflow/summarizedexperiment/main.nf @@ -1,6 +1,6 @@ // summarizedexperiment main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2928,6 +3159,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3028,26 +3263,19 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "rocker/r2u:22.04", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", "setup" : [ - { - "type" : "apt", - "packages" : [ - "r-base", - "libcurl4-openssl-dev" - ], - "interactive" : false - }, { "type" : "r", "bioc" : [ "SummarizedExperiment", "tximeta" ], - "bioc_force_install" : false + "bioc_force_install" : false, + "warnings_as_errors" : true } ] }, @@ -3061,9 +3289,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/summarizedexperiment", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3071,7 +3299,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3090,7 +3318,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/summarizedexperiment/nextflow_schema.json b/target/nextflow/summarizedexperiment/nextflow_schema.json index fb1515b..c185c97 100644 --- a/target/nextflow/summarizedexperiment/nextflow_schema.json +++ b/target/nextflow/summarizedexperiment/nextflow_schema.json @@ -97,10 +97,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. ", - "help_text": "Type: `file`, default: `$id.$key.output.output`. " + "description": "Type: `file`, default: `merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `merged_summarizedexperiment`. " , - "default":"$id.$key.output.output" + "default":"merged_summarizedexperiment" } diff --git a/target/nextflow/tx2gene/.config.vsh.yaml b/target/nextflow/tx2gene/.config.vsh.yaml index dcb3be9..d616abb 100644 --- a/target/nextflow/tx2gene/.config.vsh.yaml +++ b/target/nextflow/tx2gene/.config.vsh.yaml @@ -84,6 +84,9 @@ info: - "modules/local/tx2gene/main.nf" last_sha: "839ac5cab892504514cc96d44e99e70516b239d2" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -189,15 +192,15 @@ build_info: engine: "docker|native" output: "target/nextflow/tx2gene" executable: "target/nextflow/tx2gene/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -208,7 +211,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/tx2gene/main.nf b/target/nextflow/tx2gene/main.nf index 5135fa5..036750c 100644 --- a/target/nextflow/tx2gene/main.nf +++ b/target/nextflow/tx2gene/main.nf @@ -1,6 +1,6 @@ // tx2gene main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2920,6 +3151,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3050,9 +3285,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/tx2gene", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3060,7 +3295,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3079,7 +3314,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/tx2gene/nextflow_schema.json b/target/nextflow/tx2gene/nextflow_schema.json index 6de165a..744cda0 100644 --- a/target/nextflow/tx2gene/nextflow_schema.json +++ b/target/nextflow/tx2gene/nextflow_schema.json @@ -81,10 +81,10 @@ "tsv": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tsv.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tsv.tsv`. " + "description": "Type: `file`, default: `tx2gene.tsv`. ", + "help_text": "Type: `file`, default: `tx2gene.tsv`. " , - "default":"$id.$key.tsv.tsv" + "default":"tx2gene.tsv" } diff --git a/target/nextflow/tximport/.config.vsh.yaml b/target/nextflow/tximport/.config.vsh.yaml index 6400329..1af1c62 100644 --- a/target/nextflow/tximport/.config.vsh.yaml +++ b/target/nextflow/tximport/.config.vsh.yaml @@ -143,6 +143,9 @@ info: - "modules/local/tximport/main.nf" last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -245,6 +248,7 @@ engines: - "tximport" - "tximeta" bioc_force_install: false + warnings_as_errors: true entrypoint: [] cmd: null - type: "native" @@ -255,15 +259,15 @@ build_info: engine: "docker|native" output: "target/nextflow/tximport" executable: "target/nextflow/tximport/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -274,7 +278,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/tximport/main.nf b/target/nextflow/tximport/main.nf index f09513f..e81faf7 100644 --- a/target/nextflow/tximport/main.nf +++ b/target/nextflow/tximport/main.nf @@ -1,6 +1,6 @@ // tximport main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2989,6 +3220,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3114,7 +3349,8 @@ meta = [ "tximport", "tximeta" ], - "bioc_force_install" : false + "bioc_force_install" : false, + "warnings_as_errors" : true } ] }, @@ -3128,9 +3364,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/tximport", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3138,7 +3374,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3157,7 +3393,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/tximport/nextflow_schema.json b/target/nextflow/tximport/nextflow_schema.json index 4298e85..8c42a1d 100644 --- a/target/nextflow/tximport/nextflow_schema.json +++ b/target/nextflow/tximport/nextflow_schema.json @@ -59,10 +59,10 @@ "tpm_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. " + "description": "Type: `file`, default: `merged.gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_tpm.tsv`. " , - "default":"$id.$key.tpm_gene.tsv" + "default":"merged.gene_tpm.tsv" } @@ -70,10 +70,10 @@ "counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene.tsv`. " + "description": "Type: `file`, default: `merged.gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts.tsv`. " , - "default":"$id.$key.counts_gene.tsv" + "default":"merged.gene_counts.tsv" } @@ -81,10 +81,10 @@ "counts_gene_length_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. " + "description": "Type: `file`, default: `merged.gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts_length_scaled.tsv`. " , - "default":"$id.$key.counts_gene_length_scaled.tsv" + "default":"merged.gene_counts_length_scaled.tsv" } @@ -92,10 +92,10 @@ "counts_gene_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. " + "description": "Type: `file`, default: `merged.gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_counts_scaled.tsv`. " , - "default":"$id.$key.counts_gene_scaled.tsv" + "default":"merged.gene_counts_scaled.tsv" } @@ -103,10 +103,10 @@ "lengths_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.lengths_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.lengths_gene.tsv`. " + "description": "Type: `file`, default: `merged.gene_lengths.tsv`. ", + "help_text": "Type: `file`, default: `merged.gene_lengths.tsv`. " , - "default":"$id.$key.lengths_gene.tsv" + "default":"merged.gene_lengths.tsv" } @@ -114,10 +114,10 @@ "tpm_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. " + "description": "Type: `file`, default: `merged.transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_tpm.tsv`. " , - "default":"$id.$key.tpm_transcript.tsv" + "default":"merged.transcript_tpm.tsv" } @@ -125,10 +125,10 @@ "counts_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. " + "description": "Type: `file`, default: `merged.transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_counts.tsv`. " , - "default":"$id.$key.counts_transcript.tsv" + "default":"merged.transcript_counts.tsv" } @@ -136,10 +136,10 @@ "lengths_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.lengths_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.lengths_transcript.tsv`. " + "description": "Type: `file`, default: `merged.transcript_lengths.tsv`. ", + "help_text": "Type: `file`, default: `merged.transcript_lengths.tsv`. " , - "default":"$id.$key.lengths_transcript.tsv" + "default":"merged.transcript_lengths.tsv" } diff --git a/target/nextflow/ucsc/bedclip/.config.vsh.yaml b/target/nextflow/ucsc/bedclip/.config.vsh.yaml index adff639..d6b24cd 100644 --- a/target/nextflow/ucsc/bedclip/.config.vsh.yaml +++ b/target/nextflow/ucsc/bedclip/.config.vsh.yaml @@ -62,6 +62,9 @@ info: - "modules/nf-core/ucsc/bedclip/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -168,15 +171,15 @@ build_info: engine: "docker|native" output: "target/nextflow/ucsc/bedclip" executable: "target/nextflow/ucsc/bedclip/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -187,7 +190,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/ucsc/bedclip/main.nf b/target/nextflow/ucsc/bedclip/main.nf index fccf61b..b70f4e6 100644 --- a/target/nextflow/ucsc/bedclip/main.nf +++ b/target/nextflow/ucsc/bedclip/main.nf @@ -1,6 +1,6 @@ // bedclip main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2894,6 +3125,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3025,9 +3260,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/ucsc/bedclip", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3035,7 +3270,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3054,7 +3289,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/ucsc/bedclip/nextflow_schema.json b/target/nextflow/ucsc/bedclip/nextflow_schema.json index 456033f..61efb2a 100644 --- a/target/nextflow/ucsc/bedclip/nextflow_schema.json +++ b/target/nextflow/ucsc/bedclip/nextflow_schema.json @@ -47,10 +47,10 @@ "output_bedgraph": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_bedgraph.bedgraph`. bedGraph file after clipping", - "help_text": "Type: `file`, default: `$id.$key.output_bedgraph.bedgraph`. bedGraph file after clipping" + "description": "Type: `file`, default: `$id.$key.bedgraph`. bedGraph file after clipping", + "help_text": "Type: `file`, default: `$id.$key.bedgraph`. bedGraph file after clipping" , - "default":"$id.$key.output_bedgraph.bedgraph" + "default":"$id.$key.bedgraph" } diff --git a/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml index 26ce089..0d6a381 100644 --- a/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml +++ b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -62,6 +62,9 @@ info: - "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml" last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -168,15 +171,15 @@ build_info: engine: "docker|native" output: "target/nextflow/ucsc/bedgraphtobigwig" executable: "target/nextflow/ucsc/bedgraphtobigwig/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -187,7 +190,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/ucsc/bedgraphtobigwig/main.nf b/target/nextflow/ucsc/bedgraphtobigwig/main.nf index 4308aa4..299b1f8 100644 --- a/target/nextflow/ucsc/bedgraphtobigwig/main.nf +++ b/target/nextflow/ucsc/bedgraphtobigwig/main.nf @@ -1,6 +1,6 @@ // bedgraphtobigwig main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2894,6 +3125,10 @@ meta = [ } }, "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3025,9 +3260,9 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/ucsc/bedgraphtobigwig", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3035,7 +3270,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3054,7 +3289,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json index 1798946..1610798 100644 --- a/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json +++ b/target/nextflow/ucsc/bedgraphtobigwig/nextflow_schema.json @@ -47,10 +47,10 @@ "bigwig": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bigwig.bigwig`. bigWig coverage file relative to genes on the input file", - "help_text": "Type: `file`, default: `$id.$key.bigwig.bigwig`. bigWig coverage file relative to genes on the input file" + "description": "Type: `file`, default: `$id.$key.bigwig`. bigWig coverage file relative to genes on the input file", + "help_text": "Type: `file`, default: `$id.$key.bigwig`. bigWig coverage file relative to genes on the input file" , - "default":"$id.$key.bigwig.bigwig" + "default":"$id.$key.bigwig" } diff --git a/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml index 4d74276..b117728 100644 --- a/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml +++ b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -455,6 +455,9 @@ description: "A viash sub-workflow for genome alignment and quantification stage \ nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -594,9 +597,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/genome_alignment_and_quant" executable: "target/nextflow/workflows/genome_alignment_and_quant/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -613,7 +616,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -624,7 +627,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/genome_alignment_and_quant/main.nf b/target/nextflow/workflows/genome_alignment_and_quant/main.nf index 772405c..bedd332 100644 --- a/target/nextflow/workflows/genome_alignment_and_quant/main.nf +++ b/target/nextflow/workflows/genome_alignment_and_quant/main.nf @@ -1,6 +1,6 @@ // genome_alignment_and_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3334,6 +3565,10 @@ meta = [ ], "description" : "A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3523,9 +3758,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/genome_alignment_and_quant", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3533,7 +3768,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3552,7 +3787,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json index e52eb86..7c5a26c 100644 --- a/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json +++ b/target/nextflow/workflows/genome_alignment_and_quant/nextflow_schema.json @@ -239,10 +239,10 @@ "star_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.star_multiqc.log`. ", - "help_text": "Type: `file`, default: `$id.$key.star_multiqc.log`. " + "description": "Type: `file`, default: `$id_star.log`. ", + "help_text": "Type: `file`, default: `$id_star.log`. " , - "default":"$id.$key.star_multiqc.log" + "default":"$id_star.log" } @@ -250,10 +250,10 @@ "genome_bam_sorted": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_sorted.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_sorted.bam`. " + "description": "Type: `file`, default: `$id.genome.bam`. ", + "help_text": "Type: `file`, default: `$id.genome.bam`. " , - "default":"$id.$key.genome_bam_sorted.bam" + "default":"$id.genome.bam" } @@ -261,10 +261,10 @@ "genome_bam_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. " + "description": "Type: `file`, default: `$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.genome.bam.bai`. " , - "default":"$id.$key.genome_bam_index.bai" + "default":"$id.genome.bam.bai" } @@ -272,10 +272,10 @@ "genome_bam_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. " + "description": "Type: `file`, default: `$id.genome.stats`. ", + "help_text": "Type: `file`, default: `$id.genome.stats`. " , - "default":"$id.$key.genome_bam_stats.stats" + "default":"$id.genome.stats" } @@ -283,10 +283,10 @@ "genome_bam_flagstat": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. " + "description": "Type: `file`, default: `$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.genome.flagstat`. " , - "default":"$id.$key.genome_bam_flagstat.flagstat" + "default":"$id.genome.flagstat" } @@ -294,10 +294,10 @@ "genome_bam_idxstats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. " + "description": "Type: `file`, default: `$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.genome.idxstats`. " , - "default":"$id.$key.genome_bam_idxstats.idxstats" + "default":"$id.genome.idxstats" } @@ -305,10 +305,10 @@ "transcriptome_bam": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam.bam`. " + "description": "Type: `file`, default: `$id.transcriptome.bam`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.bam`. " , - "default":"$id.$key.transcriptome_bam.bam" + "default":"$id.transcriptome.bam" } @@ -316,10 +316,10 @@ "transcriptome_bam_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_index.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_index.bai`. " + "description": "Type: `file`, default: `$id.transcriptome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.bam.bai`. " , - "default":"$id.$key.transcriptome_bam_index.bai" + "default":"$id.transcriptome.bam.bai" } @@ -327,10 +327,10 @@ "transcriptome_bam_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_stats.stats`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_stats.stats`. " + "description": "Type: `file`, default: `$id.transcriptome.stats`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.stats`. " , - "default":"$id.$key.transcriptome_bam_stats.stats" + "default":"$id.transcriptome.stats" } @@ -338,10 +338,10 @@ "transcriptome_bam_flagstat": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_flagstat.flagstat`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_flagstat.flagstat`. " + "description": "Type: `file`, default: `$id.transcriptome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.flagstat`. " , - "default":"$id.$key.transcriptome_bam_flagstat.flagstat" + "default":"$id.transcriptome.flagstat" } @@ -349,10 +349,10 @@ "transcriptome_bam_idxstats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_idxstats.idxstats`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_idxstats.idxstats`. " + "description": "Type: `file`, default: `$id.transcriptome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.transcriptome.idxstats`. " , - "default":"$id.$key.transcriptome_bam_idxstats.idxstats" + "default":"$id.transcriptome.idxstats" } @@ -360,10 +360,10 @@ "quant_out_dir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_out_dir.salmon_quant`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_out_dir.salmon_quant`. " + "description": "Type: `file`, default: `$id.salmon_quant`. ", + "help_text": "Type: `file`, default: `$id.salmon_quant`. " , - "default":"$id.$key.quant_out_dir.salmon_quant" + "default":"$id.salmon_quant" } @@ -371,10 +371,10 @@ "quant_results_file": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_results_file.sf`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_results_file.sf`. " + "description": "Type: `file`, default: `$id.quant.sf`. ", + "help_text": "Type: `file`, default: `$id.quant.sf`. " , - "default":"$id.$key.quant_results_file.sf" + "default":"$id.quant.sf" } @@ -382,10 +382,10 @@ "salmon_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_multiqc.salmon_multiqc`. ", - "help_text": "Type: `file`, default: `$id.$key.salmon_multiqc.salmon_multiqc`. " + "description": "Type: `file`, default: `$id.$key.salmon_multiqc`. ", + "help_text": "Type: `file`, default: `$id.$key.salmon_multiqc`. " , - "default":"$id.$key.salmon_multiqc.salmon_multiqc" + "default":"$id.$key.salmon_multiqc" } @@ -393,10 +393,10 @@ "rsem_counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.rsem_counts_gene.results`. Expression counts on gene level", - "help_text": "Type: `file`, default: `$id.$key.rsem_counts_gene.results`. Expression counts on gene level" + "description": "Type: `file`, default: `$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `$id.genes.results`. Expression counts on gene level" , - "default":"$id.$key.rsem_counts_gene.results" + "default":"$id.genes.results" } @@ -404,10 +404,10 @@ "counts_transcripts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_transcripts.results`. Expression counts on transcript level", - "help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`. Expression counts on transcript level" + "description": "Type: `file`, default: `$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `$id.isoforms.results`. Expression counts on transcript level" , - "default":"$id.$key.counts_transcripts.results" + "default":"$id.isoforms.results" } @@ -415,10 +415,10 @@ "rsem_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.rsem_multiqc.stat`. RSEM statistics", - "help_text": "Type: `file`, default: `$id.$key.rsem_multiqc.stat`. RSEM statistics" + "description": "Type: `file`, default: `$id.stat`. RSEM statistics", + "help_text": "Type: `file`, default: `$id.stat`. RSEM statistics" , - "default":"$id.$key.rsem_multiqc.stat" + "default":"$id.stat" } @@ -426,10 +426,10 @@ "bam_star_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_star_rsem.bam`. BAM file generated by STAR (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_star_rsem.bam`. BAM file generated by STAR (optional)" + "description": "Type: `file`, default: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)", + "help_text": "Type: `file`, default: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)" , - "default":"$id.$key.bam_star_rsem.bam" + "default":"$id.STAR.genome.bam" } @@ -437,10 +437,10 @@ "bam_genome_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_genome_rsem.bam`. Genome BAM file (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_genome_rsem.bam`. Genome BAM file (optional)" + "description": "Type: `file`, default: `$id.genome.bam`. Genome BAM file (optional)", + "help_text": "Type: `file`, default: `$id.genome.bam`. Genome BAM file (optional)" , - "default":"$id.$key.bam_genome_rsem.bam" + "default":"$id.genome.bam" } @@ -448,10 +448,10 @@ "bam_transcript_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_transcript_rsem.bam`. Transcript BAM file (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_transcript_rsem.bam`. Transcript BAM file (optional)" + "description": "Type: `file`, default: `$id.transcript.bam`. Transcript BAM file (optional)", + "help_text": "Type: `file`, default: `$id.transcript.bam`. Transcript BAM file (optional)" , - "default":"$id.$key.bam_transcript_rsem.bam" + "default":"$id.transcript.bam" } diff --git a/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml index 40a58b2..c7fbd79 100644 --- a/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml +++ b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml @@ -184,6 +184,9 @@ description: "A sub-workflow to merge the counts obtained from salmon quant acro \ all samples." info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -282,9 +285,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/merge_quant_results" executable: "target/nextflow/workflows/merge_quant_results/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/tx2gene" - "target/nextflow/tximport" @@ -294,7 +297,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -305,7 +308,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/merge_quant_results/main.nf b/target/nextflow/workflows/merge_quant_results/main.nf index 651372c..95a099c 100644 --- a/target/nextflow/workflows/merge_quant_results/main.nf +++ b/target/nextflow/workflows/merge_quant_results/main.nf @@ -1,6 +1,6 @@ // merge_quant_results main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3029,6 +3260,10 @@ meta = [ ], "description" : "A sub-workflow to merge the counts obtained from salmon quant across all samples.", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3156,9 +3391,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/merge_quant_results", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3166,7 +3401,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3185,7 +3420,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/merge_quant_results/nextflow_schema.json b/target/nextflow/workflows/merge_quant_results/nextflow_schema.json index 46a1adb..437b50e 100644 --- a/target/nextflow/workflows/merge_quant_results/nextflow_schema.json +++ b/target/nextflow/workflows/merge_quant_results/nextflow_schema.json @@ -190,10 +190,10 @@ "quant_merged_summarizedexperiment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. " + "description": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment`, example: `quant_merged_summarizedexperiment`. " , - "default":"$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment" + "default":"$id.$key.quant_merged_summarizedexperiment" } diff --git a/target/nextflow/workflows/post_processing/.config.vsh.yaml b/target/nextflow/workflows/post_processing/.config.vsh.yaml index ca50c5e..b06d521 100644 --- a/target/nextflow/workflows/post_processing/.config.vsh.yaml +++ b/target/nextflow/workflows/post_processing/.config.vsh.yaml @@ -350,6 +350,9 @@ description: "A viash sub-workflow for the post-processing stage of nf-core/rnas \ pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -481,9 +484,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/post_processing" executable: "target/nextflow/workflows/post_processing/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/picard_markduplicates" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -500,7 +503,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -511,7 +514,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/post_processing/main.nf b/target/nextflow/workflows/post_processing/main.nf index 38a7533..f370acb 100644 --- a/target/nextflow/workflows/post_processing/main.nf +++ b/target/nextflow/workflows/post_processing/main.nf @@ -1,6 +1,6 @@ // post_processing main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3213,6 +3444,10 @@ meta = [ ], "description" : "A viash sub-workflow for the post-processing stage of nf-core/rnaseq pipeline.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3394,9 +3629,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/post_processing", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3404,7 +3639,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3423,7 +3658,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/post_processing/nextflow_schema.json b/target/nextflow/workflows/post_processing/nextflow_schema.json index e9978f5..1a482b2 100644 --- a/target/nextflow/workflows/post_processing/nextflow_schema.json +++ b/target/nextflow/workflows/post_processing/nextflow_schema.json @@ -222,10 +222,10 @@ "processed_genome_bam": { "type": "string", - "description": "Type: `file`, default: `$id.$key.processed_genome_bam.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.processed_genome_bam.bam`. " + "description": "Type: `file`, default: `$id.genome.bam`. ", + "help_text": "Type: `file`, default: `$id.genome.bam`. " , - "default":"$id.$key.processed_genome_bam.bam" + "default":"$id.genome.bam" } @@ -233,10 +233,10 @@ "genome_bam_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. " + "description": "Type: `file`, default: `$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.genome.bam.bai`. " , - "default":"$id.$key.genome_bam_index.bai" + "default":"$id.genome.bam.bai" } @@ -244,10 +244,10 @@ "genome_bam_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. " + "description": "Type: `file`, default: `$id.genome.stats`. ", + "help_text": "Type: `file`, default: `$id.genome.stats`. " , - "default":"$id.$key.genome_bam_stats.stats" + "default":"$id.genome.stats" } @@ -255,10 +255,10 @@ "genome_bam_flagstat": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. " + "description": "Type: `file`, default: `$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `$id.genome.flagstat`. " , - "default":"$id.$key.genome_bam_flagstat.flagstat" + "default":"$id.genome.flagstat" } @@ -266,10 +266,10 @@ "genome_bam_idxstats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. " + "description": "Type: `file`, default: `$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `$id.genome.idxstats`. " , - "default":"$id.$key.genome_bam_idxstats.idxstats" + "default":"$id.genome.idxstats" } @@ -277,10 +277,10 @@ "markduplicates_metrics": { "type": "string", - "description": "Type: `file`, default: `$id.$key.markduplicates_metrics.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.markduplicates_metrics.txt`. " + "description": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. ", + "help_text": "Type: `file`, default: `$id.MarkDuplicates.metrics.txt`. " , - "default":"$id.$key.markduplicates_metrics.txt" + "default":"$id.MarkDuplicates.metrics.txt" } @@ -288,10 +288,10 @@ "stringtie_transcript_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_transcript_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_transcript_gtf.gtf`. " + "description": "Type: `file`, default: `$id.stringtie.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `$id.stringtie.transcripts.gtf`. " , - "default":"$id.$key.stringtie_transcript_gtf.gtf" + "default":"$id.stringtie.transcripts.gtf" } @@ -299,10 +299,10 @@ "stringtie_coverage_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_coverage_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_coverage_gtf.gtf`. " + "description": "Type: `file`, default: `$id.stringtie.coverage.gtf`. ", + "help_text": "Type: `file`, default: `$id.stringtie.coverage.gtf`. " , - "default":"$id.$key.stringtie_coverage_gtf.gtf" + "default":"$id.stringtie.coverage.gtf" } @@ -310,10 +310,10 @@ "stringtie_abundance": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_abundance.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_abundance.txt`. " + "description": "Type: `file`, default: `$id.stringtie.gene_abundance.txt`. ", + "help_text": "Type: `file`, default: `$id.stringtie.gene_abundance.txt`. " , - "default":"$id.$key.stringtie_abundance.txt" + "default":"$id.stringtie.gene_abundance.txt" } @@ -321,10 +321,10 @@ "stringtie_ballgown": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_ballgown.ballgown`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_ballgown.ballgown`. " + "description": "Type: `file`, default: `$id.stringtie.ballgown`. ", + "help_text": "Type: `file`, default: `$id.stringtie.ballgown`. " , - "default":"$id.$key.stringtie_ballgown.ballgown" + "default":"$id.stringtie.ballgown" } @@ -332,10 +332,10 @@ "bedgraph_forward": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. " + "description": "Type: `file`, default: `$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.forward.bedgraph`. " , - "default":"$id.$key.bedgraph_forward.bedgraph" + "default":"$id.forward.bedgraph" } @@ -343,10 +343,10 @@ "bedgraph_reverse": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. " + "description": "Type: `file`, default: `$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `$id.reverse.bedgraph`. " , - "default":"$id.$key.bedgraph_reverse.bedgraph" + "default":"$id.reverse.bedgraph" } @@ -354,10 +354,10 @@ "bigwig_forward": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bigwig_forward.bigwig`. ", - "help_text": "Type: `file`, default: `$id.$key.bigwig_forward.bigwig`. " + "description": "Type: `file`, default: `$id.forward.bigwig`. ", + "help_text": "Type: `file`, default: `$id.forward.bigwig`. " , - "default":"$id.$key.bigwig_forward.bigwig" + "default":"$id.forward.bigwig" } @@ -365,10 +365,10 @@ "bigwig_reverse": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bigwig_reverse.bigwig`. ", - "help_text": "Type: `file`, default: `$id.$key.bigwig_reverse.bigwig`. " + "description": "Type: `file`, default: `$id.reverse.bigwig`. ", + "help_text": "Type: `file`, default: `$id.reverse.bigwig`. " , - "default":"$id.$key.bigwig_reverse.bigwig" + "default":"$id.reverse.bigwig" } diff --git a/target/nextflow/workflows/pre_processing/.config.vsh.yaml b/target/nextflow/workflows/pre_processing/.config.vsh.yaml index daeae54..a5ce52c 100644 --- a/target/nextflow/workflows/pre_processing/.config.vsh.yaml +++ b/target/nextflow/workflows/pre_processing/.config.vsh.yaml @@ -507,6 +507,9 @@ resources: description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -636,9 +639,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/pre_processing" executable: "target/nextflow/workflows/pre_processing/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc" - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract" @@ -653,7 +656,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -664,7 +667,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/pre_processing/main.nf b/target/nextflow/workflows/pre_processing/main.nf index 916476f..989b4ad 100644 --- a/target/nextflow/workflows/pre_processing/main.nf +++ b/target/nextflow/workflows/pre_processing/main.nf @@ -1,6 +1,6 @@ // pre_processing main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3410,6 +3641,10 @@ meta = [ ], "description" : "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3583,9 +3818,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/pre_processing", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3593,7 +3828,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3612,7 +3847,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/pre_processing/nextflow_schema.json b/target/nextflow/workflows/pre_processing/nextflow_schema.json index a87dd55..e92a958 100644 --- a/target/nextflow/workflows/pre_processing/nextflow_schema.json +++ b/target/nextflow/workflows/pre_processing/nextflow_schema.json @@ -372,10 +372,10 @@ "qc_output1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qc_output1.gz`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.qc_output1.gz`. Path to output directory" + "description": "Type: `file`, default: `${id}_r1.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `${id}_r1.fastq.gz`. Path to output directory" , - "default":"$id.$key.qc_output1.gz" + "default":"${id}_r1.fastq.gz" } @@ -383,10 +383,10 @@ "qc_output2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qc_output2.gz`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.qc_output2.gz`. Path to output directory" + "description": "Type: `file`, default: `${id}_r2.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `${id}_r2.fastq.gz`. Path to output directory" , - "default":"$id.$key.qc_output2.gz" + "default":"${id}_r2.fastq.gz" } @@ -394,10 +394,10 @@ "fastqc_html_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1." + "description": "Type: `file`, default: `${id}_r1.fastqc.html`. FastQC HTML report for read 1", + "help_text": "Type: `file`, default: `${id}_r1.fastqc.html`. FastQC HTML report for read 1." , - "default":"$id.$key.fastqc_html_1.html" + "default":"${id}_r1.fastqc.html" } @@ -405,10 +405,10 @@ "fastqc_html_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2." + "description": "Type: `file`, default: `${id}_r2.fastqc.html`. FastQC HTML report for read 2", + "help_text": "Type: `file`, default: `${id}_r2.fastqc.html`. FastQC HTML report for read 2." , - "default":"$id.$key.fastqc_html_2.html" + "default":"${id}_r2.fastqc.html" } @@ -416,10 +416,10 @@ "fastqc_zip_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1." + "description": "Type: `file`, default: `${id}_r1.fastqc.zip`. FastQC report archive for read 1", + "help_text": "Type: `file`, default: `${id}_r1.fastqc.zip`. FastQC report archive for read 1." , - "default":"$id.$key.fastqc_zip_1.zip" + "default":"${id}_r1.fastqc.zip" } @@ -427,10 +427,10 @@ "fastqc_zip_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2." + "description": "Type: `file`, default: `${id}_r2.fastqc.zip`. FastQC report archive for read 2", + "help_text": "Type: `file`, default: `${id}_r2.fastqc.zip`. FastQC report archive for read 2." , - "default":"$id.$key.fastqc_zip_2.zip" + "default":"${id}_r2.fastqc.zip" } @@ -438,10 +438,10 @@ "trim_log_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_log_1.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_log_1.txt`. " + "description": "Type: `file`, default: `${id}_r1.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimming_report.txt`. " , - "default":"$id.$key.trim_log_1.txt" + "default":"${id}_r1.trimming_report.txt" } @@ -449,10 +449,10 @@ "trim_log_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_log_2.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_log_2.txt`. " + "description": "Type: `file`, default: `${id}_r2.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimming_report.txt`. " , - "default":"$id.$key.trim_log_2.txt" + "default":"${id}_r2.trimming_report.txt" } @@ -460,10 +460,10 @@ "trim_html_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_html_1.html`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_html_1.html`. " + "description": "Type: `file`, default: `${id}_r1.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimmed_fastqc.html`. " , - "default":"$id.$key.trim_html_1.html" + "default":"${id}_r1.trimmed_fastqc.html" } @@ -471,10 +471,10 @@ "trim_html_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_html_2.html`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_html_2.html`. " + "description": "Type: `file`, default: `${id}_r2.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimmed_fastqc.html`. " , - "default":"$id.$key.trim_html_2.html" + "default":"${id}_r2.trimmed_fastqc.html" } @@ -482,10 +482,10 @@ "trim_zip_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_zip_1.zip`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_zip_1.zip`. " + "description": "Type: `file`, default: `${id}_r1.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `${id}_r1.trimmed_fastqc.zip`. " , - "default":"$id.$key.trim_zip_1.zip" + "default":"${id}_r1.trimmed_fastqc.zip" } @@ -493,10 +493,10 @@ "trim_zip_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_zip_2.zip`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_zip_2.zip`. " + "description": "Type: `file`, default: `${id}_r2.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `${id}_r2.trimmed_fastqc.zip`. " , - "default":"$id.$key.trim_zip_2.zip" + "default":"${id}_r2.trimmed_fastqc.zip" } @@ -504,10 +504,10 @@ "sortmerna_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file", - "help_text": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file." + "description": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.sortmerna.log`. Sortmerna log file." , - "default":"$id.$key.sortmerna_log.log" + "default":"$id.sortmerna.log" } @@ -515,10 +515,10 @@ "salmon_quant_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_quant_output.salmon_quant_output`. Results from Salmon quant", - "help_text": "Type: `file`, default: `$id.$key.salmon_quant_output.salmon_quant_output`. Results from Salmon quant" + "description": "Type: `file`, default: `$id.salmon_quant_output`. Results from Salmon quant", + "help_text": "Type: `file`, default: `$id.salmon_quant_output`. Results from Salmon quant" , - "default":"$id.$key.salmon_quant_output.salmon_quant_output" + "default":"$id.salmon_quant_output" } @@ -526,10 +526,10 @@ "trim_json": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_json.json`. The fastp json format report file name", - "help_text": "Type: `file`, default: `$id.$key.trim_json.json`. The fastp json format report file name" + "description": "Type: `file`, default: `$id.fastp_out.json`. The fastp json format report file name", + "help_text": "Type: `file`, default: `$id.fastp_out.json`. The fastp json format report file name" , - "default":"$id.$key.trim_json.json" + "default":"$id.fastp_out.json" } @@ -537,10 +537,10 @@ "trim_html": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_html.html`. The fastp html format report file name", - "help_text": "Type: `file`, default: `$id.$key.trim_html.html`. The fastp html format report file name" + "description": "Type: `file`, default: `$id.fastp_out.html`. The fastp html format report file name", + "help_text": "Type: `file`, default: `$id.fastp_out.html`. The fastp html format report file name" , - "default":"$id.$key.trim_html.html" + "default":"$id.fastp_out.html" } @@ -548,10 +548,10 @@ "merged_out": { "type": "string", - "description": "Type: `file`, default: `$id.$key.merged_out.merged_out`. File name to store merged fastp output", - "help_text": "Type: `file`, default: `$id.$key.merged_out.merged_out`. File name to store merged fastp output." + "description": "Type: `file`, default: `$id.$key.merged_out`. File name to store merged fastp output", + "help_text": "Type: `file`, default: `$id.$key.merged_out`. File name to store merged fastp output." , - "default":"$id.$key.merged_out.merged_out" + "default":"$id.$key.merged_out" } diff --git a/target/nextflow/workflows/prepare_genome/.config.vsh.yaml b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml index c4d7762..4e12610 100644 --- a/target/nextflow/workflows/prepare_genome/.config.vsh.yaml +++ b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml @@ -364,6 +364,9 @@ resources: description: "A subworkflow for preparing all the required genome references\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -506,9 +509,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/prepare_genome" executable: "target/nextflow/workflows/prepare_genome/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/gunzip" - "target/dependencies/vsh/vsh/biobox/main/nextflow/gffread" @@ -528,7 +531,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -539,7 +542,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/prepare_genome/main.nf b/target/nextflow/workflows/prepare_genome/main.nf index 6804504..98535c9 100644 --- a/target/nextflow/workflows/prepare_genome/main.nf +++ b/target/nextflow/workflows/prepare_genome/main.nf @@ -1,6 +1,6 @@ // prepare_genome main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3223,6 +3454,10 @@ meta = [ ], "description" : "A subworkflow for preparing all the required genome references\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3424,9 +3659,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/prepare_genome", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3434,7 +3669,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3453,7 +3688,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/prepare_genome/nextflow_schema.json b/target/nextflow/workflows/prepare_genome/nextflow_schema.json index 8350750..aee6f59 100644 --- a/target/nextflow/workflows/prepare_genome/nextflow_schema.json +++ b/target/nextflow/workflows/prepare_genome/nextflow_schema.json @@ -255,10 +255,10 @@ "fasta_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. ", - "help_text": "Type: `file`, default: `$id.$key.fasta_uncompressed.fasta`. " + "description": "Type: `file`, default: `reference_genome.fasta`. ", + "help_text": "Type: `file`, default: `reference_genome.fasta`. " , - "default":"$id.$key.fasta_uncompressed.fasta" + "default":"reference_genome.fasta" } @@ -266,10 +266,10 @@ "gtf_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.gtf_uncompressed.gtf`. " + "description": "Type: `file`, default: `gene_annotation.gtf`. ", + "help_text": "Type: `file`, default: `gene_annotation.gtf`. " , - "default":"$id.$key.gtf_uncompressed.gtf" + "default":"gene_annotation.gtf" } @@ -277,10 +277,10 @@ "transcript_fasta_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. ", - "help_text": "Type: `file`, default: `$id.$key.transcript_fasta_uncompressed.fasta`. " + "description": "Type: `file`, default: `transcriptome.fasta`. ", + "help_text": "Type: `file`, default: `transcriptome.fasta`. " , - "default":"$id.$key.transcript_fasta_uncompressed.fasta" + "default":"transcriptome.fasta" } @@ -288,10 +288,10 @@ "gene_bed_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. ", - "help_text": "Type: `file`, default: `$id.$key.gene_bed_uncompressed.bed`. " + "description": "Type: `file`, default: `gene_annotation.bed`. ", + "help_text": "Type: `file`, default: `gene_annotation.bed`. " , - "default":"$id.$key.gene_bed_uncompressed.bed" + "default":"gene_annotation.bed" } @@ -299,10 +299,10 @@ "star_index_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index", - "help_text": "Type: `file`, default: `$id.$key.star_index_uncompressed.star_index_uncompressed`. Path to STAR index." + "description": "Type: `file`, default: `STAR_index`. Path to STAR index", + "help_text": "Type: `file`, default: `STAR_index`. Path to STAR index." , - "default":"$id.$key.star_index_uncompressed.star_index_uncompressed" + "default":"STAR_index" } @@ -310,10 +310,10 @@ "rsem_index_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar", - "help_text": "Type: `file`, default: `$id.$key.rsem_index_uncompressed.rsem_index_uncompressed`. Path to directory or tar.gz archive for pre-built RSEM index." + "description": "Type: `file`, default: `RSEM_index`. Path to directory or tar", + "help_text": "Type: `file`, default: `RSEM_index`. Path to directory or tar.gz archive for pre-built RSEM index." , - "default":"$id.$key.rsem_index_uncompressed.rsem_index_uncompressed" + "default":"RSEM_index" } @@ -321,10 +321,10 @@ "salmon_index_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index", - "help_text": "Type: `file`, default: `$id.$key.salmon_index_uncompressed.salmon_index_uncompressed`. Path to Salmon index." + "description": "Type: `file`, default: `Salmon_index`. Path to Salmon index", + "help_text": "Type: `file`, default: `Salmon_index`. Path to Salmon index." , - "default":"$id.$key.salmon_index_uncompressed.salmon_index_uncompressed" + "default":"Salmon_index" } @@ -332,10 +332,10 @@ "kallisto_index_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index", - "help_text": "Type: `file`, default: `$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed`. Path to Kallisto index." + "description": "Type: `file`, default: `Kallisto_index`. Path to Kallisto index", + "help_text": "Type: `file`, default: `Kallisto_index`. Path to Kallisto index." , - "default":"$id.$key.kallisto_index_uncompressed.kallisto_index_uncompressed" + "default":"Kallisto_index" } @@ -343,10 +343,10 @@ "bbsplit_index_uncompressed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index", - "help_text": "Type: `file`, default: `$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed`. Path to BBSplit index." + "description": "Type: `file`, default: `BBSplit_index`. Path to BBSplit index", + "help_text": "Type: `file`, default: `BBSplit_index`. Path to BBSplit index." , - "default":"$id.$key.bbsplit_index_uncompressed.bbsplit_index_uncompressed" + "default":"BBSplit_index" } @@ -354,10 +354,10 @@ "chrom_sizes": { "type": "string", - "description": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths", - "help_text": "Type: `file`, default: `$id.$key.chrom_sizes.sizes`. File containing chromosome lengths" + "description": "Type: `file`, default: `reference_genome.fasta.sizes`. File containing chromosome lengths", + "help_text": "Type: `file`, default: `reference_genome.fasta.sizes`. File containing chromosome lengths" , - "default":"$id.$key.chrom_sizes.sizes" + "default":"reference_genome.fasta.sizes" } @@ -365,10 +365,10 @@ "fai": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file", - "help_text": "Type: `file`, default: `$id.$key.fai.fai`. FASTA index file" + "description": "Type: `file`, default: `reference_genome.fasta.fai`. FASTA index file", + "help_text": "Type: `file`, default: `reference_genome.fasta.fai`. FASTA index file" , - "default":"$id.$key.fai.fai" + "default":"reference_genome.fasta.fai" } diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml index 39b63be..87a00bf 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -190,6 +190,9 @@ description: "A viash sub-workflow for pseudo alignment and quantification stage \ nf-core/rnaseq pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -289,9 +292,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/pseudo_alignment_and_quant" executable: "target/nextflow/workflows/pseudo_alignment_and_quant/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant" @@ -300,7 +303,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -311,7 +314,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf index dca97f0..63f1a4d 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf @@ -1,6 +1,6 @@ // pseudo_alignment_and_quant main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -3027,6 +3258,10 @@ meta = [ ], "description" : "A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3152,9 +3387,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/pseudo_alignment_and_quant", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3162,7 +3397,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -3181,7 +3416,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json index a616836..904aed2 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json @@ -153,10 +153,10 @@ "pseudo_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_multiqc.pseudo_multiqc`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_multiqc.pseudo_multiqc`. " + "description": "Type: `file`, default: `$id.$key.pseudo_multiqc`. ", + "help_text": "Type: `file`, default: `$id.$key.pseudo_multiqc`. " , - "default":"$id.$key.pseudo_multiqc.pseudo_multiqc" + "default":"$id.$key.pseudo_multiqc" } @@ -164,10 +164,10 @@ "quant_out_dir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_out_dir.quant`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_out_dir.quant`. " + "description": "Type: `file`, default: `$id.quant`. ", + "help_text": "Type: `file`, default: `$id.quant`. " , - "default":"$id.$key.quant_out_dir.quant" + "default":"$id.quant" } @@ -175,10 +175,10 @@ "salmon_quant_results_file": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_quant_results_file.sf`. ", - "help_text": "Type: `file`, default: `$id.$key.salmon_quant_results_file.sf`. " + "description": "Type: `file`, default: `$id.quant.sf`. ", + "help_text": "Type: `file`, default: `$id.quant.sf`. " , - "default":"$id.$key.salmon_quant_results_file.sf" + "default":"$id.quant.sf" } @@ -186,10 +186,10 @@ "kallisto_quant_results_file": { "type": "string", - "description": "Type: `file`, default: `$id.$key.kallisto_quant_results_file.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.kallisto_quant_results_file.tsv`. " + "description": "Type: `file`, default: `$id.abundance.tsv`. ", + "help_text": "Type: `file`, default: `$id.abundance.tsv`. " , - "default":"$id.$key.kallisto_quant_results_file.tsv" + "default":"$id.abundance.tsv" } diff --git a/target/nextflow/workflows/quality_control/.config.vsh.yaml b/target/nextflow/workflows/quality_control/.config.vsh.yaml index 3349733..590a7cb 100644 --- a/target/nextflow/workflows/quality_control/.config.vsh.yaml +++ b/target/nextflow/workflows/quality_control/.config.vsh.yaml @@ -1405,6 +1405,9 @@ description: "A subworkflow for the final quality control stage of the nf-core/r \ pipeline.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -1560,9 +1563,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/quality_control" executable: "target/nextflow/workflows/quality_control/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment" @@ -1587,7 +1590,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -1598,7 +1601,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/quality_control/main.nf b/target/nextflow/workflows/quality_control/main.nf index d5d3597..22bc76d 100644 --- a/target/nextflow/workflows/quality_control/main.nf +++ b/target/nextflow/workflows/quality_control/main.nf @@ -1,6 +1,6 @@ // quality_control main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -4406,6 +4637,10 @@ meta = [ ], "description" : "A subworkflow for the final quality control stage of the nf-core/rnaseq pipeline.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -4635,9 +4870,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/quality_control", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -4645,7 +4880,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -4664,7 +4899,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/quality_control/nextflow_schema.json b/target/nextflow/workflows/quality_control/nextflow_schema.json index b5e5c6a..0357263 100644 --- a/target/nextflow/workflows/quality_control/nextflow_schema.json +++ b/target/nextflow/workflows/quality_control/nextflow_schema.json @@ -836,10 +836,10 @@ "preseq_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.preseq_output.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.preseq_output.txt`. " + "description": "Type: `file`, default: `$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `$id.lc_extrap.txt`. " , - "default":"$id.$key.preseq_output.txt" + "default":"$id.lc_extrap.txt" } @@ -847,10 +847,10 @@ "bamstat_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bamstat_output.txt`. Path to output file (txt) of mapping quality statistics", - "help_text": "Type: `file`, default: `$id.$key.bamstat_output.txt`. Path to output file (txt) of mapping quality statistics" + "description": "Type: `file`, default: `$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics", + "help_text": "Type: `file`, default: `$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics" , - "default":"$id.$key.bamstat_output.txt" + "default":"$id.mapping_quality.txt" } @@ -858,10 +858,10 @@ "strandedness_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.strandedness_output.txt`. Path to output report (txt) of inferred strandedness", - "help_text": "Type: `file`, default: `$id.$key.strandedness_output.txt`. Path to output report (txt) of inferred strandedness" + "description": "Type: `file`, default: `$id.strandedness.txt`. Path to output report (txt) of inferred strandedness", + "help_text": "Type: `file`, default: `$id.strandedness.txt`. Path to output report (txt) of inferred strandedness" , - "default":"$id.$key.strandedness_output.txt" + "default":"$id.strandedness.txt" } @@ -869,10 +869,10 @@ "inner_dist_output_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads" + "description": "Type: `file`, default: `$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads" , - "default":"$id.$key.inner_dist_output_stats.stats" + "default":"$id.inner_distance.stats" } @@ -880,10 +880,10 @@ "inner_dist_output_dist": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_dist.txt`. output file (txt) with inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_dist.txt`. output file (txt) with inner distances of all paired reads" + "description": "Type: `file`, default: `$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_dist.txt" + "default":"$id.inner_distance.txt" } @@ -891,10 +891,10 @@ "inner_dist_output_freq": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_freq.txt" + "default":"$id.inner_distance_freq.txt" } @@ -902,10 +902,10 @@ "inner_dist_output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_plot.pdf" + "default":"$id.inner_distance_plot.pdf" } @@ -913,10 +913,10 @@ "inner_dist_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_plot_r.r" + "default":"$id.inner_distance_plot.r" } @@ -924,10 +924,10 @@ "junction_annotation_output_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_log.log`. output log of junction annotation script", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_log.log`. output log of junction annotation script" + "description": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `$id.junction_annotation.log`. output log of junction annotation script" , - "default":"$id.$key.junction_annotation_output_log.log" + "default":"$id.junction_annotation.log" } @@ -935,10 +935,10 @@ "junction_annotation_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_plot_r.r`. R script to generate splice_junction and splice_events plot", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_plot_r.r`. R script to generate splice_junction and splice_events plot" + "description": "Type: `file`, default: `$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot" , - "default":"$id.$key.junction_annotation_output_plot_r.r" + "default":"$id.junction_annotation_plot.r" } @@ -946,10 +946,10 @@ "junction_annotation_output_junction_bed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_bed.bed`. junction annotation file (bed format)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_bed.bed`. junction annotation file (bed format)" + "description": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.bed`. junction annotation file (bed format)" , - "default":"$id.$key.junction_annotation_output_junction_bed.bed" + "default":"$id.junction_annotation.bed" } @@ -957,10 +957,10 @@ "junction_annotation_output_junction_interact": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_interact.bed`. interact file (bed format) of junctions", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + "description": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." , - "default":"$id.$key.junction_annotation_output_junction_interact.bed" + "default":"$id.junction_annotation.Interact.bed" } @@ -968,10 +968,10 @@ "junction_annotation_output_junction_sheet": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_sheet.xls`. junction annotation file (xls format)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_sheet.xls`. junction annotation file (xls format)" + "description": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `$id.junction_annotation.xls`. junction annotation file (xls format)" , - "default":"$id.$key.junction_annotation_output_junction_sheet.xls" + "default":"$id.junction_annotation.xls" } @@ -979,10 +979,10 @@ "junction_annotation_output_splice_events_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_events_plot.pdf`. plot of splice events (pdf)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_events_plot.pdf`. plot of splice events (pdf)" + "description": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `$id.splice_events.pdf`. plot of splice events (pdf)" , - "default":"$id.$key.junction_annotation_output_splice_events_plot.pdf" + "default":"$id.splice_events.pdf" } @@ -990,10 +990,10 @@ "junction_annotation_output_splice_junctions_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_junctions_plot.pdf`. plot of junctions (pdf)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_junctions_plot.pdf`. plot of junctions (pdf)" + "description": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" , - "default":"$id.$key.junction_annotation_output_splice_junctions_plot.pdf" + "default":"$id.splice_junctions_plot.pdf" } @@ -1001,10 +1001,10 @@ "junction_saturation_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_saturation_output_plot_r.r`. r script to generate junction_saturation_plot plot", - "help_text": "Type: `file`, default: `$id.$key.junction_saturation_output_plot_r.r`. r script to generate junction_saturation_plot plot" + "description": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" , - "default":"$id.$key.junction_saturation_output_plot_r.r" + "default":"$id.junction_saturation_plot.r" } @@ -1012,10 +1012,10 @@ "junction_saturation_output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_saturation_output_plot.pdf`. plot of junction saturation (pdf", - "help_text": "Type: `file`, default: `$id.$key.junction_saturation_output_plot.pdf`. plot of junction saturation (pdf" + "description": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf", + "help_text": "Type: `file`, default: `$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf" , - "default":"$id.$key.junction_saturation_output_plot.pdf" + "default":"$id.junction_saturation_plot.pdf" } @@ -1023,10 +1023,10 @@ "read_distribution_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_distribution_output.txt`. output file (txt) of read distribution analysis", - "help_text": "Type: `file`, default: `$id.$key.read_distribution_output.txt`. output file (txt) of read distribution analysis." + "description": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `$id.read_distribution.txt`. output file (txt) of read distribution analysis." , - "default":"$id.$key.read_distribution_output.txt" + "default":"$id.read_distribution.txt" } @@ -1034,10 +1034,10 @@ "read_duplication_output_duplication_rate_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot_r.r`. R script for generating duplication rate plot", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot_r.r`. R script for generating duplication rate plot" + "description": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.r`. R script for generating duplication rate plot" , - "default":"$id.$key.read_duplication_output_duplication_rate_plot_r.r" + "default":"$id.duplication_rate_plot.r" } @@ -1045,10 +1045,10 @@ "read_duplication_output_duplication_rate_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot.pdf`. duplication rate plot (pdf)", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot.pdf`. duplication rate plot (pdf)" + "description": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" , - "default":"$id.$key.read_duplication_output_duplication_rate_plot.pdf" + "default":"$id.duplication_rate_plot.pdf" } @@ -1056,10 +1056,10 @@ "read_duplication_output_duplication_rate_mapping": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + "description": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" , - "default":"$id.$key.read_duplication_output_duplication_rate_mapping.xls" + "default":"$id.duplication_rate_mapping.xls" } @@ -1067,10 +1067,10 @@ "read_duplication_output_duplication_rate_sequence": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication" + "description": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" , - "default":"$id.$key.read_duplication_output_duplication_rate_sequence.xls" + "default":"$id.duplication_rate_sequencing.xls" } @@ -1078,10 +1078,10 @@ "tin_output_summary": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tin_output_summary.txt`. summary statistics (txt) of calculated TIN metrics", - "help_text": "Type: `file`, default: `$id.$key.tin_output_summary.txt`. summary statistics (txt) of calculated TIN metrics" + "description": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" , - "default":"$id.$key.tin_output_summary.txt" + "default":"$id.tin_summary.txt" } @@ -1089,10 +1089,10 @@ "tin_output_metrics": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tin_output_metrics.xls`. file with TIN metrics (xls)", - "help_text": "Type: `file`, default: `$id.$key.tin_output_metrics.xls`. file with TIN metrics (xls)" + "description": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `$id.tin.xls`. file with TIN metrics (xls)" , - "default":"$id.$key.tin_output_metrics.xls" + "default":"$id.tin.xls" } @@ -1100,10 +1100,10 @@ "dupradar_output_dupmatrix": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts" + "description": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" , - "default":"$id.$key.dupradar_output_dupmatrix.txt" + "default":"$id.dup_matrix.txt" } @@ -1111,10 +1111,10 @@ "dupradar_output_dup_intercept_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + "description": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" , - "default":"$id.$key.dupradar_output_dup_intercept_mqc.txt" + "default":"$id.dup_intercept_mqc.txt" } @@ -1122,10 +1122,10 @@ "dupradar_output_duprate_exp_boxplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + "description": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" , - "default":"$id.$key.dupradar_output_duprate_exp_boxplot.pdf" + "default":"$id.duprate_exp_boxplot.pdf" } @@ -1133,10 +1133,10 @@ "dupradar_output_duprate_exp_densplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + "description": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" , - "default":"$id.$key.dupradar_output_duprate_exp_densplot.pdf" + "default":"$id.duprate_exp_densityplot.pdf" } @@ -1144,10 +1144,10 @@ "dupradar_output_duprate_exp_denscurve_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" + "description": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" , - "default":"$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf" + "default":"$id.duprate_exp_density_curve_mqc.pdf" } @@ -1155,10 +1155,10 @@ "dupradar_output_expression_histogram": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + "description": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" , - "default":"$id.$key.dupradar_output_expression_histogram.pdf" + "default":"$id.expression_hist.pdf" } @@ -1166,10 +1166,10 @@ "dupradar_output_intercept_slope": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_intercept_slope.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_intercept_slope.txt`. " + "description": "Type: `file`, default: `$id.intercept_slope.txt`. ", + "help_text": "Type: `file`, default: `$id.intercept_slope.txt`. " , - "default":"$id.$key.dupradar_output_intercept_slope.txt" + "default":"$id.intercept_slope.txt" } @@ -1188,10 +1188,10 @@ "qualimap_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_counts.qualimap_counts`. Output file for computed counts", - "help_text": "Type: `file`, default: `$id.$key.qualimap_counts.qualimap_counts`. Output file for computed counts." + "description": "Type: `file`, default: `$id.$key.qualimap_counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.qualimap_counts`. Output file for computed counts." , - "default":"$id.$key.qualimap_counts.qualimap_counts" + "default":"$id.$key.qualimap_counts" } @@ -1210,10 +1210,10 @@ "deseq2_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.deseq2_output.deseq2_output`. ", - "help_text": "Type: `file`, default: `$id.$key.deseq2_output.deseq2_output`. " + "description": "Type: `file`, default: `deseq2`. ", + "help_text": "Type: `file`, default: `deseq2`. " , - "default":"$id.$key.deseq2_output.deseq2_output" + "default":"deseq2" } @@ -1221,10 +1221,10 @@ "deseq2_output_pseudo": { "type": "string", - "description": "Type: `file`, default: `$id.$key.deseq2_output_pseudo.deseq2_output_pseudo`. ", - "help_text": "Type: `file`, default: `$id.$key.deseq2_output_pseudo.deseq2_output_pseudo`. " + "description": "Type: `file`, default: `deseq2_pseudo`. ", + "help_text": "Type: `file`, default: `deseq2_pseudo`. " , - "default":"$id.$key.deseq2_output_pseudo.deseq2_output_pseudo" + "default":"deseq2_pseudo" } @@ -1232,10 +1232,10 @@ "multiqc_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_report.html`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_report.html`. " + "description": "Type: `file`, default: `multiqc_report.html`. ", + "help_text": "Type: `file`, default: `multiqc_report.html`. " , - "default":"$id.$key.multiqc_report.html" + "default":"multiqc_report.html" } @@ -1243,10 +1243,10 @@ "multiqc_data": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_data.multiqc_data`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_data.multiqc_data`. " + "description": "Type: `file`, default: `multiqc_data`. ", + "help_text": "Type: `file`, default: `multiqc_data`. " , - "default":"$id.$key.multiqc_data.multiqc_data" + "default":"multiqc_data" } @@ -1254,10 +1254,10 @@ "multiqc_plots": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_plots.multiqc_plots`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_plots.multiqc_plots`. " + "description": "Type: `file`, default: `multiqc_plots`. ", + "help_text": "Type: `file`, default: `multiqc_plots`. " , - "default":"$id.$key.multiqc_plots.multiqc_plots" + "default":"multiqc_plots" } @@ -1265,10 +1265,10 @@ "featurecounts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts.txt`. " + "description": "Type: `file`, default: `$id.featureCounts.txt`. ", + "help_text": "Type: `file`, default: `$id.featureCounts.txt`. " , - "default":"$id.$key.featurecounts.txt" + "default":"$id.featureCounts.txt" } @@ -1276,10 +1276,10 @@ "featurecounts_summary": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_summary.summary`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_summary.summary`. " + "description": "Type: `file`, default: `$id.featureCounts.txt.summary`. ", + "help_text": "Type: `file`, default: `$id.featureCounts.txt.summary`. " , - "default":"$id.$key.featurecounts_summary.summary" + "default":"$id.featureCounts.txt.summary" } @@ -1287,10 +1287,10 @@ "featurecounts_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. " + "description": "Type: `file`, default: `$id.featureCounts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.featureCounts_mqc.tsv`. " , - "default":"$id.$key.featurecounts_multiqc.tsv" + "default":"$id.featureCounts_mqc.tsv" } @@ -1298,10 +1298,10 @@ "featurecounts_rrna_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. " + "description": "Type: `file`, default: `$id.featureCounts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `$id.featureCounts_rrna_mqc.tsv`. " , - "default":"$id.$key.featurecounts_rrna_multiqc.tsv" + "default":"$id.featureCounts_rrna_mqc.tsv" } @@ -1309,10 +1309,10 @@ "tpm_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. " + "description": "Type: `file`, default: `salmon.merged.gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_tpm.tsv`. " , - "default":"$id.$key.tpm_gene.tsv" + "default":"salmon.merged.gene_tpm.tsv" } @@ -1320,10 +1320,10 @@ "counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene.tsv`. " + "description": "Type: `file`, default: `salmon.merged.gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts.tsv`. " , - "default":"$id.$key.counts_gene.tsv" + "default":"salmon.merged.gene_counts.tsv" } @@ -1331,10 +1331,10 @@ "counts_gene_length_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. " + "description": "Type: `file`, default: `salmon.merged.gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts_length_scaled.tsv`. " , - "default":"$id.$key.counts_gene_length_scaled.tsv" + "default":"salmon.merged.gene_counts_length_scaled.tsv" } @@ -1342,10 +1342,10 @@ "counts_gene_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. " + "description": "Type: `file`, default: `salmon.merged.gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.gene_counts_scaled.tsv`. " , - "default":"$id.$key.counts_gene_scaled.tsv" + "default":"salmon.merged.gene_counts_scaled.tsv" } @@ -1353,10 +1353,10 @@ "tpm_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. " + "description": "Type: `file`, default: `salmon.merged.transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.transcript_tpm.tsv`. " , - "default":"$id.$key.tpm_transcript.tsv" + "default":"salmon.merged.transcript_tpm.tsv" } @@ -1364,10 +1364,10 @@ "counts_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. " + "description": "Type: `file`, default: `salmon.merged.transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `salmon.merged.transcript_counts.tsv`. " , - "default":"$id.$key.counts_transcript.tsv" + "default":"salmon.merged.transcript_counts.tsv" } @@ -1375,10 +1375,10 @@ "quant_merged_summarizedexperiment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`. " + "description": "Type: `file`, default: `salmon_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `salmon_merged_summarizedexperiment`. " , - "default":"$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment" + "default":"salmon_merged_summarizedexperiment" } @@ -1386,10 +1386,10 @@ "pseudo_tpm_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_tpm_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_tpm_gene.tsv`. " + "description": "Type: `file`, default: `pseudo_gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_tpm.tsv`. " , - "default":"$id.$key.pseudo_tpm_gene.tsv" + "default":"pseudo_gene_tpm.tsv" } @@ -1397,10 +1397,10 @@ "pseudo_counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene.tsv`. " + "description": "Type: `file`, default: `pseudo_gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts.tsv`. " , - "default":"$id.$key.pseudo_counts_gene.tsv" + "default":"pseudo_gene_counts.tsv" } @@ -1408,10 +1408,10 @@ "pseudo_counts_gene_length_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene_length_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene_length_scaled.tsv`. " + "description": "Type: `file`, default: `pseudo_gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts_length_scaled.tsv`. " , - "default":"$id.$key.pseudo_counts_gene_length_scaled.tsv" + "default":"pseudo_gene_counts_length_scaled.tsv" } @@ -1419,10 +1419,10 @@ "pseudo_counts_gene_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene_scaled.tsv`. " + "description": "Type: `file`, default: `pseudo_gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_gene_counts_scaled.tsv`. " , - "default":"$id.$key.pseudo_counts_gene_scaled.tsv" + "default":"pseudo_gene_counts_scaled.tsv" } @@ -1430,10 +1430,10 @@ "pseudo_tpm_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_tpm_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_tpm_transcript.tsv`. " + "description": "Type: `file`, default: `pseudo_transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_transcript_tpm.tsv`. " , - "default":"$id.$key.pseudo_tpm_transcript.tsv" + "default":"pseudo_transcript_tpm.tsv" } @@ -1441,10 +1441,10 @@ "pseudo_counts_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_transcript.tsv`. " + "description": "Type: `file`, default: `pseudo_transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_transcript_counts.tsv`. " , - "default":"$id.$key.pseudo_counts_transcript.tsv" + "default":"pseudo_transcript_counts.tsv" } @@ -1452,10 +1452,10 @@ "pseudo_quant_merged_summarizedexperiment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment`. " + "description": "Type: `file`, default: `pseudo_quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `pseudo_quant_merged_summarizedexperiment`. " , - "default":"$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment" + "default":"pseudo_quant_merged_summarizedexperiment" } diff --git a/target/nextflow/workflows/rnaseq/.config.vsh.yaml b/target/nextflow/workflows/rnaseq/.config.vsh.yaml index 0787eab..ab27d3c 100644 --- a/target/nextflow/workflows/rnaseq/.config.vsh.yaml +++ b/target/nextflow/workflows/rnaseq/.config.vsh.yaml @@ -1948,6 +1948,9 @@ test_resources: entrypoint: "test_wf" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -2058,9 +2061,9 @@ build_info: engine: "native" output: "target/nextflow/workflows/rnaseq" executable: "target/nextflow/workflows/rnaseq/main.nf" - viash_version: "0.9.0" - git_commit: "2b3d511b34246648b934fd1dc99b22e0a71c37f2" - git_remote: "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + viash_version: "0.9.2" + git_commit: "f52978a0e25cae182b7874b4b8aa3afc183e880e" + git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/workflows/prepare_genome" - "target/nextflow/cat_fastq" @@ -2074,7 +2077,7 @@ package_config: version: "main" info: test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" + - path: "gs://viash-hub-resources/rnaseq/v1" dest: "testData" repositories: - type: "vsh" @@ -2085,7 +2088,7 @@ package_config: name: "craftbox" repo: "craftbox" tag: "v0.1.0" - viash_version: "0.9.0" + viash_version: "0.9.2" source: "src" target: "target" config_mods: diff --git a/target/nextflow/workflows/rnaseq/main.nf b/target/nextflow/workflows/rnaseq/main.nf index 6af2502..46d9208 100644 --- a/target/nextflow/workflows/rnaseq/main.nf +++ b/target/nextflow/workflows/rnaseq/main.nf @@ -1,6 +1,6 @@ // rnaseq main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -5075,6 +5306,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -5226,9 +5461,9 @@ meta = [ "runner" : "nextflow", "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/rnaseq", - "viash_version" : "0.9.0", - "git_commit" : "2b3d511b34246648b934fd1dc99b22e0a71c37f2", - "git_remote" : "https://x-access-token:ghs_mpjpezoXRPhvITbEzvFPaxzcp8yfML2ITu9P@github.com/viash-hub/rnaseq" + "viash_version" : "0.9.2", + "git_commit" : "f52978a0e25cae182b7874b4b8aa3afc183e880e", + "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -5236,7 +5471,7 @@ meta = [ "info" : { "test_resources" : [ { - "path" : "gs://viash-hub-test-data/rnaseq/v1", + "path" : "gs://viash-hub-resources/rnaseq/v1", "dest" : "testData" } ] @@ -5255,7 +5490,7 @@ meta = [ "tag" : "v0.1.0" } ], - "viash_version" : "0.9.0", + "viash_version" : "0.9.2", "source" : "/workdir/root/repo/src", "target" : "/workdir/root/repo/target", "config_mods" : [ diff --git a/target/nextflow/workflows/rnaseq/nextflow_schema.json b/target/nextflow/workflows/rnaseq/nextflow_schema.json index f0c8300..6276246 100644 --- a/target/nextflow/workflows/rnaseq/nextflow_schema.json +++ b/target/nextflow/workflows/rnaseq/nextflow_schema.json @@ -855,10 +855,10 @@ "output_fasta": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_fasta.fasta`. ", - "help_text": "Type: `file`, default: `$id.$key.output_fasta.fasta`. " + "description": "Type: `file`, default: `reference/genome.fasta`. ", + "help_text": "Type: `file`, default: `reference/genome.fasta`. " , - "default":"$id.$key.output_fasta.fasta" + "default":"reference/genome.fasta" } @@ -866,10 +866,10 @@ "output_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.output_gtf.gtf`. " + "description": "Type: `file`, default: `reference/gene_annotation.gtf`. ", + "help_text": "Type: `file`, default: `reference/gene_annotation.gtf`. " , - "default":"$id.$key.output_gtf.gtf" + "default":"reference/gene_annotation.gtf" } @@ -877,10 +877,10 @@ "output_transcript_fasta": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_transcript_fasta.fasta`. ", - "help_text": "Type: `file`, default: `$id.$key.output_transcript_fasta.fasta`. " + "description": "Type: `file`, default: `reference/transcriptome.fasta`. ", + "help_text": "Type: `file`, default: `reference/transcriptome.fasta`. " , - "default":"$id.$key.output_transcript_fasta.fasta" + "default":"reference/transcriptome.fasta" } @@ -888,10 +888,10 @@ "output_gene_bed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_gene_bed.bed`. ", - "help_text": "Type: `file`, default: `$id.$key.output_gene_bed.bed`. " + "description": "Type: `file`, default: `reference/gene_annotation.bed`. ", + "help_text": "Type: `file`, default: `reference/gene_annotation.bed`. " , - "default":"$id.$key.output_gene_bed.bed" + "default":"reference/gene_annotation.bed" } @@ -899,10 +899,10 @@ "output_star_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_star_index.output_star_index`. Path to STAR index", - "help_text": "Type: `file`, default: `$id.$key.output_star_index.output_star_index`. Path to STAR index." + "description": "Type: `file`, default: `reference/index/STAR`. Path to STAR index", + "help_text": "Type: `file`, default: `reference/index/STAR`. Path to STAR index." , - "default":"$id.$key.output_star_index.output_star_index" + "default":"reference/index/STAR" } @@ -910,10 +910,10 @@ "output_salmon_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_salmon_index.output_salmon_index`. Path to Salmon index", - "help_text": "Type: `file`, default: `$id.$key.output_salmon_index.output_salmon_index`. Path to Salmon index." + "description": "Type: `file`, default: `reference/index/Salmon`. Path to Salmon index", + "help_text": "Type: `file`, default: `reference/index/Salmon`. Path to Salmon index." , - "default":"$id.$key.output_salmon_index.output_salmon_index" + "default":"reference/index/Salmon" } @@ -921,10 +921,10 @@ "output_bbsplit_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_bbsplit_index.output_bbsplit_index`. Path to BBSplit index", - "help_text": "Type: `file`, default: `$id.$key.output_bbsplit_index.output_bbsplit_index`. Path to BBSplit index." + "description": "Type: `file`, default: `reference/index/BBSplit`. Path to BBSplit index", + "help_text": "Type: `file`, default: `reference/index/BBSplit`. Path to BBSplit index." , - "default":"$id.$key.output_bbsplit_index.output_bbsplit_index" + "default":"reference/index/BBSplit" } @@ -932,10 +932,10 @@ "output_kallisto_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_kallisto_index.output_kallisto_index`. Path to Kallisto index", - "help_text": "Type: `file`, default: `$id.$key.output_kallisto_index.output_kallisto_index`. Path to Kallisto index." + "description": "Type: `file`, default: `reference/index/Kallisto`. Path to Kallisto index", + "help_text": "Type: `file`, default: `reference/index/Kallisto`. Path to Kallisto index." , - "default":"$id.$key.output_kallisto_index.output_kallisto_index" + "default":"reference/index/Kallisto" } @@ -943,10 +943,10 @@ "output_fastq_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_fastq_1.gz`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.output_fastq_1.gz`. Path to output directory" + "description": "Type: `file`, default: `fastq/${id}_r1.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `fastq/${id}_r1.fastq.gz`. Path to output directory" , - "default":"$id.$key.output_fastq_1.gz" + "default":"fastq/${id}_r1.fastq.gz" } @@ -954,10 +954,10 @@ "output_fastq_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_fastq_2.gz`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.output_fastq_2.gz`. Path to output directory" + "description": "Type: `file`, default: `fastq/${id}_r2.fastq.gz`. Path to output directory", + "help_text": "Type: `file`, default: `fastq/${id}_r2.fastq.gz`. Path to output directory" , - "default":"$id.$key.output_fastq_2.gz" + "default":"fastq/${id}_r2.fastq.gz" } @@ -965,10 +965,10 @@ "fastqc_html_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1." + "description": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.html`. FastQC HTML report for read 1", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.html`. FastQC HTML report for read 1." , - "default":"$id.$key.fastqc_html_1.html" + "default":"fastqc_raw/${id}_r1.fastqc.html" } @@ -976,10 +976,10 @@ "fastqc_html_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2." + "description": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.html`. FastQC HTML report for read 2", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.html`. FastQC HTML report for read 2." , - "default":"$id.$key.fastqc_html_2.html" + "default":"fastqc_raw/${id}_r2.fastqc.html" } @@ -987,10 +987,10 @@ "fastqc_zip_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1." + "description": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.zip`. FastQC report archive for read 1", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r1.fastqc.zip`. FastQC report archive for read 1." , - "default":"$id.$key.fastqc_zip_1.zip" + "default":"fastqc_raw/${id}_r1.fastqc.zip" } @@ -998,10 +998,10 @@ "fastqc_zip_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2." + "description": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.zip`. FastQC report archive for read 2", + "help_text": "Type: `file`, default: `fastqc_raw/${id}_r2.fastqc.zip`. FastQC report archive for read 2." , - "default":"$id.$key.fastqc_zip_2.zip" + "default":"fastqc_raw/${id}_r2.fastqc.zip" } @@ -1009,10 +1009,10 @@ "trim_html_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_html_1.html`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_html_1.html`. " + "description": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.html`. " , - "default":"$id.$key.trim_html_1.html" + "default":"fastqc_trim/${id}_r1.trimmed_fastqc.html" } @@ -1020,10 +1020,10 @@ "trim_html_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_html_2.html`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_html_2.html`. " + "description": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.html`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.html`. " , - "default":"$id.$key.trim_html_2.html" + "default":"fastqc_trim/${id}_r2.trimmed_fastqc.html" } @@ -1031,10 +1031,10 @@ "trim_zip_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_zip_1.zip`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_zip_1.zip`. " + "description": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r1.trimmed_fastqc.zip`. " , - "default":"$id.$key.trim_zip_1.zip" + "default":"fastqc_trim/${id}_r1.trimmed_fastqc.zip" } @@ -1042,10 +1042,10 @@ "trim_zip_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_zip_2.zip`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_zip_2.zip`. " + "description": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.zip`. ", + "help_text": "Type: `file`, default: `fastqc_trim/${id}_r2.trimmed_fastqc.zip`. " , - "default":"$id.$key.trim_zip_2.zip" + "default":"fastqc_trim/${id}_r2.trimmed_fastqc.zip" } @@ -1053,10 +1053,10 @@ "trim_log_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_log_1.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_log_1.txt`. " + "description": "Type: `file`, default: `trimgalore/${id}_r1.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `trimgalore/${id}_r1.trimming_report.txt`. " , - "default":"$id.$key.trim_log_1.txt" + "default":"trimgalore/${id}_r1.trimming_report.txt" } @@ -1064,10 +1064,10 @@ "trim_log_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trim_log_2.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.trim_log_2.txt`. " + "description": "Type: `file`, default: `trimgalore/${id}_r2.trimming_report.txt`. ", + "help_text": "Type: `file`, default: `trimgalore/${id}_r2.trimming_report.txt`. " , - "default":"$id.$key.trim_log_2.txt" + "default":"trimgalore/${id}_r2.trimming_report.txt" } @@ -1075,10 +1075,10 @@ "fastp_trim_json": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastp_trim_json.json`. The fastp json format report file name", - "help_text": "Type: `file`, default: `$id.$key.fastp_trim_json.json`. The fastp json format report file name" + "description": "Type: `file`, default: `fastp/$id_out.json`. The fastp json format report file name", + "help_text": "Type: `file`, default: `fastp/$id_out.json`. The fastp json format report file name" , - "default":"$id.$key.fastp_trim_json.json" + "default":"fastp/$id_out.json" } @@ -1086,10 +1086,10 @@ "fastp_trim_html": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastp_trim_html.html`. The fastp html format report file name", - "help_text": "Type: `file`, default: `$id.$key.fastp_trim_html.html`. The fastp html format report file name" + "description": "Type: `file`, default: `fastp/$id_out.html`. The fastp html format report file name", + "help_text": "Type: `file`, default: `fastp/$id_out.html`. The fastp html format report file name" , - "default":"$id.$key.fastp_trim_html.html" + "default":"fastp/$id_out.html" } @@ -1097,10 +1097,10 @@ "sortmerna_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file", - "help_text": "Type: `file`, default: `$id.$key.sortmerna_log.log`. Sortmerna log file." + "description": "Type: `file`, default: `sortmerna/$id.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `sortmerna/$id.log`. Sortmerna log file." , - "default":"$id.$key.sortmerna_log.log" + "default":"sortmerna/$id.log" } @@ -1108,10 +1108,10 @@ "star_alignment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.star_alignment.star_alignment`. ", - "help_text": "Type: `file`, default: `$id.$key.star_alignment.star_alignment`. " + "description": "Type: `file`, default: `STAR/$id`. ", + "help_text": "Type: `file`, default: `STAR/$id`. " , - "default":"$id.$key.star_alignment.star_alignment" + "default":"STAR/$id" } @@ -1119,10 +1119,10 @@ "genome_bam_sorted": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_sorted.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_sorted.bam`. " + "description": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam`. ", + "help_text": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam`. " , - "default":"$id.$key.genome_bam_sorted.bam" + "default":"STAR/genome_processed/$id.genome.bam" } @@ -1130,10 +1130,10 @@ "genome_bam_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_index.bai`. " + "description": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam.bai`. ", + "help_text": "Type: `file`, default: `STAR/genome_processed/$id.genome.bam.bai`. " , - "default":"$id.$key.genome_bam_index.bai" + "default":"STAR/genome_processed/$id.genome.bam.bai" } @@ -1141,10 +1141,10 @@ "transcriptome_bam": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam.bam`. " + "description": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam`. ", + "help_text": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam`. " , - "default":"$id.$key.transcriptome_bam.bam" + "default":"STAR/transcriptome_processed/$id.transcriptome.bam" } @@ -1152,10 +1152,10 @@ "transcriptome_bam_index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_index.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_index.bai`. " + "description": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam.bai`. ", + "help_text": "Type: `file`, default: `STAR/transcriptome_processed/$id.transcriptome.bam.bai`. " , - "default":"$id.$key.transcriptome_bam_index.bai" + "default":"STAR/transcriptome_processed/$id.transcriptome.bam.bai" } @@ -1163,10 +1163,10 @@ "star_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.star_log.log`. ", - "help_text": "Type: `file`, default: `$id.$key.star_log.log`. " + "description": "Type: `file`, default: `STAR/log/$id.log`. ", + "help_text": "Type: `file`, default: `STAR/log/$id.log`. " , - "default":"$id.$key.star_log.log" + "default":"STAR/log/$id.log" } @@ -1174,10 +1174,10 @@ "genome_bam_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_stats.stats`. " + "description": "Type: `file`, default: `samtools_stats/$id.genome.stats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.stats`. " , - "default":"$id.$key.genome_bam_stats.stats" + "default":"samtools_stats/$id.genome.stats" } @@ -1185,10 +1185,10 @@ "genome_bam_flagstat": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_flagstat.flagstat`. " + "description": "Type: `file`, default: `samtools_stats/$id.genome.flagstat`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.flagstat`. " , - "default":"$id.$key.genome_bam_flagstat.flagstat" + "default":"samtools_stats/$id.genome.flagstat" } @@ -1196,10 +1196,10 @@ "genome_bam_idxstats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. ", - "help_text": "Type: `file`, default: `$id.$key.genome_bam_idxstats.idxstats`. " + "description": "Type: `file`, default: `samtools_stats/$id.genome.idxstats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.genome.idxstats`. " , - "default":"$id.$key.genome_bam_idxstats.idxstats" + "default":"samtools_stats/$id.genome.idxstats" } @@ -1207,10 +1207,10 @@ "transcriptome_bam_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_stats.stats`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_stats.stats`. " + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.stats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.stats`. " , - "default":"$id.$key.transcriptome_bam_stats.stats" + "default":"samtools_stats/$id.transcriptome.stats" } @@ -1218,10 +1218,10 @@ "transcriptome_bam_flagstat": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_flagstat.flagstat`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_flagstat.flagstat`. " + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.flagstat`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.flagstat`. " , - "default":"$id.$key.transcriptome_bam_flagstat.flagstat" + "default":"samtools_stats/$id.transcriptome.flagstat" } @@ -1229,10 +1229,10 @@ "transcriptome_bam_idxstats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.transcriptome_bam_idxstats.idxstats`. ", - "help_text": "Type: `file`, default: `$id.$key.transcriptome_bam_idxstats.idxstats`. " + "description": "Type: `file`, default: `samtools_stats/$id.transcriptome.idxstats`. ", + "help_text": "Type: `file`, default: `samtools_stats/$id.transcriptome.idxstats`. " , - "default":"$id.$key.transcriptome_bam_idxstats.idxstats" + "default":"samtools_stats/$id.transcriptome.idxstats" } @@ -1240,10 +1240,10 @@ "salmon_quant_results": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_quant_results.salmon_quant_results`. ", - "help_text": "Type: `file`, default: `$id.$key.salmon_quant_results.salmon_quant_results`. " + "description": "Type: `file`, default: `STAR_Salmon/$id`. ", + "help_text": "Type: `file`, default: `STAR_Salmon/$id`. " , - "default":"$id.$key.salmon_quant_results.salmon_quant_results" + "default":"STAR_Salmon/$id" } @@ -1251,10 +1251,10 @@ "salmon_quant_results_file": { "type": "string", - "description": "Type: `file`, default: `$id.$key.salmon_quant_results_file.sf`. ", - "help_text": "Type: `file`, default: `$id.$key.salmon_quant_results_file.sf`. " + "description": "Type: `file`, default: `STAR_Salmon/$id/quant.sf`. ", + "help_text": "Type: `file`, default: `STAR_Salmon/$id/quant.sf`. " , - "default":"$id.$key.salmon_quant_results_file.sf" + "default":"STAR_Salmon/$id/quant.sf" } @@ -1262,10 +1262,10 @@ "pseudo_quant_results": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_quant_results.pseudo_quant_results`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_quant_results.pseudo_quant_results`. " + "description": "Type: `file`, default: `Pseudo_align_quant/$id`. ", + "help_text": "Type: `file`, default: `Pseudo_align_quant/$id`. " , - "default":"$id.$key.pseudo_quant_results.pseudo_quant_results" + "default":"Pseudo_align_quant/$id" } @@ -1273,10 +1273,10 @@ "rsem_counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.rsem_counts_gene.results`. Expression counts on gene level", - "help_text": "Type: `file`, default: `$id.$key.rsem_counts_gene.results`. Expression counts on gene level" + "description": "Type: `file`, default: `RSEM/$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `RSEM/$id.genes.results`. Expression counts on gene level" , - "default":"$id.$key.rsem_counts_gene.results" + "default":"RSEM/$id.genes.results" } @@ -1284,10 +1284,10 @@ "rsem_counts_transcripts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.rsem_counts_transcripts.results`. Expression counts on transcript level", - "help_text": "Type: `file`, default: `$id.$key.rsem_counts_transcripts.results`. Expression counts on transcript level" + "description": "Type: `file`, default: `RSEM/$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `RSEM/$id.isoforms.results`. Expression counts on transcript level" , - "default":"$id.$key.rsem_counts_transcripts.results" + "default":"RSEM/$id.isoforms.results" } @@ -1295,10 +1295,10 @@ "bam_star_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_star_rsem.bam`. BAM file generated by STAR (from RSEM)", - "help_text": "Type: `file`, default: `$id.$key.bam_star_rsem.bam`. BAM file generated by STAR (from RSEM)" + "description": "Type: `file`, default: `RSEM/$id.STAR.genome.bam`. BAM file generated by STAR (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.STAR.genome.bam`. BAM file generated by STAR (from RSEM)" , - "default":"$id.$key.bam_star_rsem.bam" + "default":"RSEM/$id.STAR.genome.bam" } @@ -1306,10 +1306,10 @@ "bam_genome_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_genome_rsem.bam`. Genome BAM file (from RSEM)", - "help_text": "Type: `file`, default: `$id.$key.bam_genome_rsem.bam`. Genome BAM file (from RSEM)" + "description": "Type: `file`, default: `RSEM/$id.genome.bam`. Genome BAM file (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.genome.bam`. Genome BAM file (from RSEM)" , - "default":"$id.$key.bam_genome_rsem.bam" + "default":"RSEM/$id.genome.bam" } @@ -1317,10 +1317,10 @@ "bam_transcript_rsem": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bam_transcript_rsem.bam`. Transcript BAM file (from RSEM)", - "help_text": "Type: `file`, default: `$id.$key.bam_transcript_rsem.bam`. Transcript BAM file (from RSEM)" + "description": "Type: `file`, default: `RSEM/$id.transcript.bam`. Transcript BAM file (from RSEM)", + "help_text": "Type: `file`, default: `RSEM/$id.transcript.bam`. Transcript BAM file (from RSEM)" , - "default":"$id.$key.bam_transcript_rsem.bam" + "default":"RSEM/$id.transcript.bam" } @@ -1328,10 +1328,10 @@ "tpm_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_gene.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_tpm.tsv`. " , - "default":"$id.$key.tpm_gene.tsv" + "default":"transcript_quantification/gene_tpm.tsv" } @@ -1339,10 +1339,10 @@ "counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts.tsv`. " , - "default":"$id.$key.counts_gene.tsv" + "default":"transcript_quantification/gene_counts.tsv" } @@ -1350,10 +1350,10 @@ "counts_gene_length_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_length_scaled.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts_length_scaled.tsv`. " , - "default":"$id.$key.counts_gene_length_scaled.tsv" + "default":"transcript_quantification/gene_counts_length_scaled.tsv" } @@ -1361,10 +1361,10 @@ "counts_gene_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_gene_scaled.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/gene_counts_scaled.tsv`. " , - "default":"$id.$key.counts_gene_scaled.tsv" + "default":"transcript_quantification/gene_counts_scaled.tsv" } @@ -1372,10 +1372,10 @@ "tpm_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.tpm_transcript.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/transcript_tpm.tsv`. " , - "default":"$id.$key.tpm_transcript.tsv" + "default":"transcript_quantification/transcript_tpm.tsv" } @@ -1383,10 +1383,10 @@ "counts_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.counts_transcript.tsv`. " + "description": "Type: `file`, default: `transcript_quantification/transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `transcript_quantification/transcript_counts.tsv`. " , - "default":"$id.$key.counts_transcript.tsv" + "default":"transcript_quantification/transcript_counts.tsv" } @@ -1394,10 +1394,10 @@ "quant_merged_summarizedexperiment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`. ", - "help_text": "Type: `file`, default: `$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment`. " + "description": "Type: `file`, default: `transcript_quantification/summarizedexperiment`. ", + "help_text": "Type: `file`, default: `transcript_quantification/summarizedexperiment`. " , - "default":"$id.$key.quant_merged_summarizedexperiment.quant_merged_summarizedexperiment" + "default":"transcript_quantification/summarizedexperiment" } @@ -1405,10 +1405,10 @@ "markduplicates_metrics": { "type": "string", - "description": "Type: `file`, default: `$id.$key.markduplicates_metrics.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.markduplicates_metrics.txt`. " + "description": "Type: `file`, default: `picard/$id.MarkDuplicates.metrics.txt`. ", + "help_text": "Type: `file`, default: `picard/$id.MarkDuplicates.metrics.txt`. " , - "default":"$id.$key.markduplicates_metrics.txt" + "default":"picard/$id.MarkDuplicates.metrics.txt" } @@ -1416,10 +1416,10 @@ "stringtie_transcript_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_transcript_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_transcript_gtf.gtf`. " + "description": "Type: `file`, default: `stringtie/$id.transcripts.gtf`. ", + "help_text": "Type: `file`, default: `stringtie/$id.transcripts.gtf`. " , - "default":"$id.$key.stringtie_transcript_gtf.gtf" + "default":"stringtie/$id.transcripts.gtf" } @@ -1427,10 +1427,10 @@ "stringtie_coverage_gtf": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_coverage_gtf.gtf`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_coverage_gtf.gtf`. " + "description": "Type: `file`, default: `stringtie/$id.coverage.gtf`. ", + "help_text": "Type: `file`, default: `stringtie/$id.coverage.gtf`. " , - "default":"$id.$key.stringtie_coverage_gtf.gtf" + "default":"stringtie/$id.coverage.gtf" } @@ -1438,10 +1438,10 @@ "stringtie_abundance": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_abundance.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_abundance.txt`. " + "description": "Type: `file`, default: `stringtie/$id.gene_abundance.txt`. ", + "help_text": "Type: `file`, default: `stringtie/$id.gene_abundance.txt`. " , - "default":"$id.$key.stringtie_abundance.txt" + "default":"stringtie/$id.gene_abundance.txt" } @@ -1449,10 +1449,10 @@ "stringtie_ballgown": { "type": "string", - "description": "Type: `file`, default: `$id.$key.stringtie_ballgown.ballgown`. ", - "help_text": "Type: `file`, default: `$id.$key.stringtie_ballgown.ballgown`. " + "description": "Type: `file`, default: `stringtie/$id.ballgown`. ", + "help_text": "Type: `file`, default: `stringtie/$id.ballgown`. " , - "default":"$id.$key.stringtie_ballgown.ballgown" + "default":"stringtie/$id.ballgown" } @@ -1460,10 +1460,10 @@ "featurecounts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts.txt`. " + "description": "Type: `file`, default: `featurecounts/$id.featureCounts.txt`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts.txt`. " , - "default":"$id.$key.featurecounts.txt" + "default":"featurecounts/$id.featureCounts.txt" } @@ -1471,10 +1471,10 @@ "featurecounts_summary": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_summary.summary`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_summary.summary`. " + "description": "Type: `file`, default: `featurecounts/$id.featureCounts.txt.summary`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts.txt.summary`. " , - "default":"$id.$key.featurecounts_summary.summary" + "default":"featurecounts/$id.featureCounts.txt.summary" } @@ -1482,10 +1482,10 @@ "featurecounts_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_multiqc.tsv`. " + "description": "Type: `file`, default: `featurecounts/$id.featureCounts_mqc.tsv`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts_mqc.tsv`. " , - "default":"$id.$key.featurecounts_multiqc.tsv" + "default":"featurecounts/$id.featureCounts_mqc.tsv" } @@ -1493,10 +1493,10 @@ "featurecounts_rrna_multiqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.featurecounts_rrna_multiqc.tsv`. " + "description": "Type: `file`, default: `featurecounts/$id.featureCounts_rrna_mqc.tsv`. ", + "help_text": "Type: `file`, default: `featurecounts/$id.featureCounts_rrna_mqc.tsv`. " , - "default":"$id.$key.featurecounts_rrna_multiqc.tsv" + "default":"featurecounts/$id.featureCounts_rrna_mqc.tsv" } @@ -1504,10 +1504,10 @@ "bedgraph_forward": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_forward.bedgraph`. " + "description": "Type: `file`, default: `bedgraph/$id.forward.bedgraph`. ", + "help_text": "Type: `file`, default: `bedgraph/$id.forward.bedgraph`. " , - "default":"$id.$key.bedgraph_forward.bedgraph" + "default":"bedgraph/$id.forward.bedgraph" } @@ -1515,10 +1515,10 @@ "bedgraph_reverse": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. ", - "help_text": "Type: `file`, default: `$id.$key.bedgraph_reverse.bedgraph`. " + "description": "Type: `file`, default: `bedgraph/$id.reverse.bedgraph`. ", + "help_text": "Type: `file`, default: `bedgraph/$id.reverse.bedgraph`. " , - "default":"$id.$key.bedgraph_reverse.bedgraph" + "default":"bedgraph/$id.reverse.bedgraph" } @@ -1526,10 +1526,10 @@ "bigwig_forward": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bigwig_forward.bigwig`. ", - "help_text": "Type: `file`, default: `$id.$key.bigwig_forward.bigwig`. " + "description": "Type: `file`, default: `bigwig/$id.forward.bigwig`. ", + "help_text": "Type: `file`, default: `bigwig/$id.forward.bigwig`. " , - "default":"$id.$key.bigwig_forward.bigwig" + "default":"bigwig/$id.forward.bigwig" } @@ -1537,10 +1537,10 @@ "bigwig_reverse": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bigwig_reverse.bigwig`. ", - "help_text": "Type: `file`, default: `$id.$key.bigwig_reverse.bigwig`. " + "description": "Type: `file`, default: `bigwig/$id.reverse.bigwig`. ", + "help_text": "Type: `file`, default: `bigwig/$id.reverse.bigwig`. " , - "default":"$id.$key.bigwig_reverse.bigwig" + "default":"bigwig/$id.reverse.bigwig" } @@ -1548,10 +1548,10 @@ "preseq_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.preseq_output.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.preseq_output.txt`. " + "description": "Type: `file`, default: `preseq/$id.lc_extrap.txt`. ", + "help_text": "Type: `file`, default: `preseq/$id.lc_extrap.txt`. " , - "default":"$id.$key.preseq_output.txt" + "default":"preseq/$id.lc_extrap.txt" } @@ -1559,10 +1559,10 @@ "bamstat_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bamstat_output.txt`. Path to output file (txt) of mapping quality statistics", - "help_text": "Type: `file`, default: `$id.$key.bamstat_output.txt`. Path to output file (txt) of mapping quality statistics" + "description": "Type: `file`, default: `RSeQC/bamstat/$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics", + "help_text": "Type: `file`, default: `RSeQC/bamstat/$id.mapping_quality.txt`. Path to output file (txt) of mapping quality statistics" , - "default":"$id.$key.bamstat_output.txt" + "default":"RSeQC/bamstat/$id.mapping_quality.txt" } @@ -1570,10 +1570,10 @@ "strandedness_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.strandedness_output.txt`. Path to output report (txt) of inferred strandedness", - "help_text": "Type: `file`, default: `$id.$key.strandedness_output.txt`. Path to output report (txt) of inferred strandedness" + "description": "Type: `file`, default: `RSeQC/inferexperiment/$id.strandedness.txt`. Path to output report (txt) of inferred strandedness", + "help_text": "Type: `file`, default: `RSeQC/inferexperiment/$id.strandedness.txt`. Path to output report (txt) of inferred strandedness" , - "default":"$id.$key.strandedness_output.txt" + "default":"RSeQC/inferexperiment/$id.strandedness.txt" } @@ -1581,10 +1581,10 @@ "inner_dist_output_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads" + "description": "Type: `file`, default: `RSeQC/innerdistance/$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/$id.inner_distance.stats`. output file (txt) with summary statistics of inner distances of paired reads" , - "default":"$id.$key.inner_dist_output_stats.stats" + "default":"RSeQC/innerdistance/$id.inner_distance.stats" } @@ -1592,10 +1592,10 @@ "inner_dist_output_dist": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_dist.txt`. output file (txt) with inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_dist.txt`. output file (txt) with inner distances of all paired reads" + "description": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance.txt`. output file (txt) with inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_dist.txt" + "default":"RSeQC/innerdistance/txt/$id.inner_distance.txt" } @@ -1603,10 +1603,10 @@ "inner_dist_output_freq": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" + "description": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/txt/$id.inner_distance_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_freq.txt" + "default":"RSeQC/innerdistance/txt/$id.inner_distance_freq.txt" } @@ -1614,10 +1614,10 @@ "inner_dist_output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_plot.pdf" + "default":"RSeQC/innerdistance/pdf/$id.inner_distance_plot.pdf" } @@ -1625,10 +1625,10 @@ "inner_dist_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.inner_dist_output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.inner_dist_output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `RSeQC/innerdistance/rscript/$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `RSeQC/innerdistance/rscript/$id.inner_distance_plot.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.inner_dist_output_plot_r.r" + "default":"RSeQC/innerdistance/rscript/$id.inner_distance_plot.r" } @@ -1636,10 +1636,10 @@ "junction_annotation_output_log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_log.log`. output log of junction annotation script", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_log.log`. output log of junction annotation script" + "description": "Type: `file`, default: `RSeQC/junctionannotation/log/$id.junction_annotation.log`. output log of junction annotation script", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/log/$id.junction_annotation.log`. output log of junction annotation script" , - "default":"$id.$key.junction_annotation_output_log.log" + "default":"RSeQC/junctionannotation/log/$id.junction_annotation.log" } @@ -1647,10 +1647,10 @@ "junction_annotation_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_plot_r.r`. R script to generate splice_junction and splice_events plot", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_plot_r.r`. R script to generate splice_junction and splice_events plot" + "description": "Type: `file`, default: `RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r`. R script to generate splice_junction and splice_events plot" , - "default":"$id.$key.junction_annotation_output_plot_r.r" + "default":"RSeQC/junctionannotation/rscript/$id.junction_annotation_plot.r" } @@ -1658,10 +1658,10 @@ "junction_annotation_output_junction_bed": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_bed.bed`. junction annotation file (bed format)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_bed.bed`. junction annotation file (bed format)" + "description": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.bed`. junction annotation file (bed format)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.bed`. junction annotation file (bed format)" , - "default":"$id.$key.junction_annotation_output_junction_bed.bed" + "default":"RSeQC/junctionannotation/bed/$id.junction_annotation.bed" } @@ -1669,10 +1669,10 @@ "junction_annotation_output_junction_interact": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_interact.bed`. interact file (bed format) of junctions", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." + "description": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed`. interact file (bed format) of junctions. Can be uploaded to UCSC genome browser or converted to bigInteract (using bedToBigBed program) for visualization." , - "default":"$id.$key.junction_annotation_output_junction_interact.bed" + "default":"RSeQC/junctionannotation/bed/$id.junction_annotation.Interact.bed" } @@ -1680,10 +1680,10 @@ "junction_annotation_output_junction_sheet": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_sheet.xls`. junction annotation file (xls format)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_junction_sheet.xls`. junction annotation file (xls format)" + "description": "Type: `file`, default: `RSeQC/junctionannotation/xls/$id.junction_annotation.xls`. junction annotation file (xls format)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/xls/$id.junction_annotation.xls`. junction annotation file (xls format)" , - "default":"$id.$key.junction_annotation_output_junction_sheet.xls" + "default":"RSeQC/junctionannotation/xls/$id.junction_annotation.xls" } @@ -1691,10 +1691,10 @@ "junction_annotation_output_splice_events_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_events_plot.pdf`. plot of splice events (pdf)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_events_plot.pdf`. plot of splice events (pdf)" + "description": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_events.pdf`. plot of splice events (pdf)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_events.pdf`. plot of splice events (pdf)" , - "default":"$id.$key.junction_annotation_output_splice_events_plot.pdf" + "default":"RSeQC/junctionannotation/pdf/$id.splice_events.pdf" } @@ -1702,10 +1702,10 @@ "junction_annotation_output_splice_junctions_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_junctions_plot.pdf`. plot of junctions (pdf)", - "help_text": "Type: `file`, default: `$id.$key.junction_annotation_output_splice_junctions_plot.pdf`. plot of junctions (pdf)" + "description": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf`. plot of junctions (pdf)", + "help_text": "Type: `file`, default: `RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf`. plot of junctions (pdf)" , - "default":"$id.$key.junction_annotation_output_splice_junctions_plot.pdf" + "default":"RSeQC/junctionannotation/pdf/$id.splice_junctions_plot.pdf" } @@ -1713,10 +1713,10 @@ "junction_saturation_output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_saturation_output_plot_r.r`. r script to generate junction_saturation_plot plot", - "help_text": "Type: `file`, default: `$id.$key.junction_saturation_output_plot_r.r`. r script to generate junction_saturation_plot plot" + "description": "Type: `file`, default: `RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot", + "help_text": "Type: `file`, default: `RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r`. r script to generate junction_saturation_plot plot" , - "default":"$id.$key.junction_saturation_output_plot_r.r" + "default":"RSeQC/junctionsaturation/rscript/$id.junction_saturation_plot.r" } @@ -1724,10 +1724,10 @@ "junction_saturation_output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.junction_saturation_output_plot.pdf`. plot of junction saturation (pdf", - "help_text": "Type: `file`, default: `$id.$key.junction_saturation_output_plot.pdf`. plot of junction saturation (pdf" + "description": "Type: `file`, default: `RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf", + "help_text": "Type: `file`, default: `RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf`. plot of junction saturation (pdf" , - "default":"$id.$key.junction_saturation_output_plot.pdf" + "default":"RSeQC/junctionsaturation/pdf/$id.junction_saturation_plot.pdf" } @@ -1735,10 +1735,10 @@ "read_distribution_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_distribution_output.txt`. output file (txt) of read distribution analysis", - "help_text": "Type: `file`, default: `$id.$key.read_distribution_output.txt`. output file (txt) of read distribution analysis." + "description": "Type: `file`, default: `RSeQC/readdistribution/$id.read_distribution.txt`. output file (txt) of read distribution analysis", + "help_text": "Type: `file`, default: `RSeQC/readdistribution/$id.read_distribution.txt`. output file (txt) of read distribution analysis." , - "default":"$id.$key.read_distribution_output.txt" + "default":"RSeQC/readdistribution/$id.read_distribution.txt" } @@ -1746,10 +1746,10 @@ "read_duplication_output_duplication_rate_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot_r.r`. R script for generating duplication rate plot", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot_r.r`. R script for generating duplication rate plot" + "description": "Type: `file`, default: `RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r`. R script for generating duplication rate plot", + "help_text": "Type: `file`, default: `RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r`. R script for generating duplication rate plot" , - "default":"$id.$key.read_duplication_output_duplication_rate_plot_r.r" + "default":"RSeQC/readduplication/rscrpt/$id.duplication_rate_plot.r" } @@ -1757,10 +1757,10 @@ "read_duplication_output_duplication_rate_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot.pdf`. duplication rate plot (pdf)", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_plot.pdf`. duplication rate plot (pdf)" + "description": "Type: `file`, default: `RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)", + "help_text": "Type: `file`, default: `RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf`. duplication rate plot (pdf)" , - "default":"$id.$key.read_duplication_output_duplication_rate_plot.pdf" + "default":"RSeQC/readduplication/pdf/$id.duplication_rate_plot.pdf" } @@ -1768,10 +1768,10 @@ "read_duplication_output_duplication_rate_mapping": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_mapping.xls`. Summary of mapping-based read duplication" + "description": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication", + "help_text": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls`. Summary of mapping-based read duplication" , - "default":"$id.$key.read_duplication_output_duplication_rate_mapping.xls" + "default":"RSeQC/readduplication/xls/$id.duplication_rate_mapping.xls" } @@ -1779,10 +1779,10 @@ "read_duplication_output_duplication_rate_sequence": { "type": "string", - "description": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication", - "help_text": "Type: `file`, default: `$id.$key.read_duplication_output_duplication_rate_sequence.xls`. Summary of sequencing-based read duplication" + "description": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication", + "help_text": "Type: `file`, default: `RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls`. Summary of sequencing-based read duplication" , - "default":"$id.$key.read_duplication_output_duplication_rate_sequence.xls" + "default":"RSeQC/readduplication/xls/$id.duplication_rate_sequencing.xls" } @@ -1790,10 +1790,10 @@ "tin_output_summary": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tin_output_summary.txt`. summary statistics (txt) of calculated TIN metrics", - "help_text": "Type: `file`, default: `$id.$key.tin_output_summary.txt`. summary statistics (txt) of calculated TIN metrics" + "description": "Type: `file`, default: `RSeQC/tin/txt/$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics", + "help_text": "Type: `file`, default: `RSeQC/tin/txt/$id.tin_summary.txt`. summary statistics (txt) of calculated TIN metrics" , - "default":"$id.$key.tin_output_summary.txt" + "default":"RSeQC/tin/txt/$id.tin_summary.txt" } @@ -1801,10 +1801,10 @@ "tin_output_metrics": { "type": "string", - "description": "Type: `file`, default: `$id.$key.tin_output_metrics.xls`. file with TIN metrics (xls)", - "help_text": "Type: `file`, default: `$id.$key.tin_output_metrics.xls`. file with TIN metrics (xls)" + "description": "Type: `file`, default: `RSeQC/tin/xls/$id.tin.xls`. file with TIN metrics (xls)", + "help_text": "Type: `file`, default: `RSeQC/tin/xls/$id.tin.xls`. file with TIN metrics (xls)" , - "default":"$id.$key.tin_output_metrics.xls" + "default":"RSeQC/tin/xls/$id.tin.xls" } @@ -1812,10 +1812,10 @@ "dupradar_output_dupmatrix": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_dupmatrix.txt`. path to output file (txt) of duplicate tag counts" + "description": "Type: `file`, default: `dupradar/gene_data/$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts", + "help_text": "Type: `file`, default: `dupradar/gene_data/$id.dup_matrix.txt`. path to output file (txt) of duplicate tag counts" , - "default":"$id.$key.dupradar_output_dupmatrix.txt" + "default":"dupradar/gene_data/$id.dup_matrix.txt" } @@ -1823,10 +1823,10 @@ "dupradar_output_dup_intercept_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" + "description": "Type: `file`, default: `dupradar/mqc_intercept/$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar", + "help_text": "Type: `file`, default: `dupradar/mqc_intercept/$id.dup_intercept_mqc.txt`. path to output file (txt) of multiqc intercept value DupRadar" , - "default":"$id.$key.dupradar_output_dup_intercept_mqc.txt" + "default":"dupradar/mqc_intercept/$id.dup_intercept_mqc.txt" } @@ -1834,10 +1834,10 @@ "dupradar_output_duprate_exp_boxplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" + "description": "Type: `file`, default: `dupradar/box_plot/$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot", + "help_text": "Type: `file`, default: `dupradar/box_plot/$id.duprate_exp_boxplot.pdf`. path to output file (pdf) of distribution of expression box plot" , - "default":"$id.$key.dupradar_output_duprate_exp_boxplot.pdf" + "default":"dupradar/box_plot/$id.duprate_exp_boxplot.pdf" } @@ -1845,10 +1845,10 @@ "dupradar_output_duprate_exp_densplot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_densplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" + "description": "Type: `file`, default: `dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts", + "help_text": "Type: `file`, default: `dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf`. path to output file (pdf) of 2D density scatter plot of duplicate tag counts" , - "default":"$id.$key.dupradar_output_duprate_exp_densplot.pdf" + "default":"dupradar/scatter_plot/$id.duprate_exp_densityplot.pdf" } @@ -1856,10 +1856,10 @@ "dupradar_output_duprate_exp_denscurve_mqc": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" + "description": "Type: `file`, default: `dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc", + "help_text": "Type: `file`, default: `dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf`. path to output file (pdf) of density curve of gene duplication multiqc" , - "default":"$id.$key.dupradar_output_duprate_exp_denscurve_mqc.pdf" + "default":"dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" } @@ -1867,10 +1867,10 @@ "dupradar_output_expression_histogram": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_expression_histogram.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" + "description": "Type: `file`, default: `dupradar/histogram/$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram", + "help_text": "Type: `file`, default: `dupradar/histogram/$id.expression_hist.pdf`. path to output file (pdf) of distribution of RPK values per gene histogram" , - "default":"$id.$key.dupradar_output_expression_histogram.pdf" + "default":"dupradar/histogram/$id.expression_hist.pdf" } @@ -1878,10 +1878,10 @@ "dupradar_output_intercept_slope": { "type": "string", - "description": "Type: `file`, default: `$id.$key.dupradar_output_intercept_slope.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.dupradar_output_intercept_slope.txt`. " + "description": "Type: `file`, default: `dupradar/intercept_slope/$id.intercept_slope.txt`. ", + "help_text": "Type: `file`, default: `dupradar/intercept_slope/$id.intercept_slope.txt`. " , - "default":"$id.$key.dupradar_output_intercept_slope.txt" + "default":"dupradar/intercept_slope/$id.intercept_slope.txt" } @@ -1889,10 +1889,10 @@ "qualimap_qc_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`. Text file containing the RNAseq QC results", - "help_text": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`. Text file containing the RNAseq QC results." + "description": "Type: `file`, default: `Qualimap/$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, default: `Qualimap/$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." , - "default":"$id.$key.qualimap_qc_report.txt" + "default":"Qualimap/$id.rnaseq_qc_results.txt" } @@ -1900,10 +1900,10 @@ "qualimap_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_counts.txt`. Output file for computed counts", - "help_text": "Type: `file`, default: `$id.$key.qualimap_counts.txt`. Output file for computed counts." + "description": "Type: `file`, default: `Qualimap/$id.counts.txt`. Output file for computed counts", + "help_text": "Type: `file`, default: `Qualimap/$id.counts.txt`. Output file for computed counts." , - "default":"$id.$key.qualimap_counts.txt" + "default":"Qualimap/$id.counts.txt" } @@ -1911,10 +1911,10 @@ "qualimap_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_report.html`. Report output file", - "help_text": "Type: `file`, default: `$id.$key.qualimap_report.html`. Report output file. Supported formats are PDF or HTML." + "description": "Type: `file`, default: `Qualimap/$id.report.html`. Report output file", + "help_text": "Type: `file`, default: `Qualimap/$id.report.html`. Report output file. Supported formats are PDF or HTML." , - "default":"$id.$key.qualimap_report.html" + "default":"Qualimap/$id.report.html" } @@ -1922,10 +1922,10 @@ "deseq2_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.deseq2_output.deseq2_output`. ", - "help_text": "Type: `file`, default: `$id.$key.deseq2_output.deseq2_output`. " + "description": "Type: `file`, default: `deseq2_qc`. ", + "help_text": "Type: `file`, default: `deseq2_qc`. " , - "default":"$id.$key.deseq2_output.deseq2_output" + "default":"deseq2_qc" } @@ -1933,10 +1933,10 @@ "deseq2_output_pseudo": { "type": "string", - "description": "Type: `file`, default: `$id.$key.deseq2_output_pseudo.deseq2_output_pseudo`. ", - "help_text": "Type: `file`, default: `$id.$key.deseq2_output_pseudo.deseq2_output_pseudo`. " + "description": "Type: `file`, default: `deseq2_qc_pseudo`. ", + "help_text": "Type: `file`, default: `deseq2_qc_pseudo`. " , - "default":"$id.$key.deseq2_output_pseudo.deseq2_output_pseudo" + "default":"deseq2_qc_pseudo" } @@ -1944,10 +1944,10 @@ "multiqc_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_report.html`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_report.html`. " + "description": "Type: `file`, default: `multiqc/multiqc_report.html`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_report.html`. " , - "default":"$id.$key.multiqc_report.html" + "default":"multiqc/multiqc_report.html" } @@ -1955,10 +1955,10 @@ "multiqc_data": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_data.multiqc_data`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_data.multiqc_data`. " + "description": "Type: `file`, default: `multiqc/multiqc_data`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_data`. " , - "default":"$id.$key.multiqc_data.multiqc_data" + "default":"multiqc/multiqc_data" } @@ -1966,10 +1966,10 @@ "multiqc_plots": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_plots.multiqc_plots`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_plots.multiqc_plots`. " + "description": "Type: `file`, default: `multiqc/multiqc_plots`. ", + "help_text": "Type: `file`, default: `multiqc/multiqc_plots`. " , - "default":"$id.$key.multiqc_plots.multiqc_plots" + "default":"multiqc/multiqc_plots" } @@ -1977,10 +1977,10 @@ "multiqc_versions": { "type": "string", - "description": "Type: `file`, default: `$id.$key.multiqc_versions.multiqc_versions`. ", - "help_text": "Type: `file`, default: `$id.$key.multiqc_versions.multiqc_versions`. " + "description": "Type: `file`, default: `$id.$key.multiqc_versions`. ", + "help_text": "Type: `file`, default: `$id.$key.multiqc_versions`. " , - "default":"$id.$key.multiqc_versions.multiqc_versions" + "default":"$id.$key.multiqc_versions" } @@ -1988,10 +1988,10 @@ "pseudo_counts_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts.tsv`. " , - "default":"$id.$key.pseudo_counts_gene.tsv" + "default":"pseudo_alignment_quantification/gene_counts.tsv" } @@ -1999,10 +1999,10 @@ "pseudo_counts_gene_length_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene_length_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene_length_scaled.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_length_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_length_scaled.tsv`. " , - "default":"$id.$key.pseudo_counts_gene_length_scaled.tsv" + "default":"pseudo_alignment_quantification/gene_counts_length_scaled.tsv" } @@ -2010,10 +2010,10 @@ "pseudo_counts_gene_scaled": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_gene_scaled.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_gene_scaled.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_scaled.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_counts_scaled.tsv`. " , - "default":"$id.$key.pseudo_counts_gene_scaled.tsv" + "default":"pseudo_alignment_quantification/gene_counts_scaled.tsv" } @@ -2021,10 +2021,10 @@ "pseudo_tpm_gene": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_tpm_gene.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_tpm_gene.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/gene_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/gene_tpm.tsv`. " , - "default":"$id.$key.pseudo_tpm_gene.tsv" + "default":"pseudo_alignment_quantification/gene_tpm.tsv" } @@ -2032,10 +2032,10 @@ "pseudo_tpm_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_tpm_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_tpm_transcript.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/transcript_tpm.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/transcript_tpm.tsv`. " , - "default":"$id.$key.pseudo_tpm_transcript.tsv" + "default":"pseudo_alignment_quantification/transcript_tpm.tsv" } @@ -2043,10 +2043,10 @@ "pseudo_counts_transcript": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_counts_transcript.tsv`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_counts_transcript.tsv`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/transcript_counts.tsv`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/transcript_counts.tsv`. " , - "default":"$id.$key.pseudo_counts_transcript.tsv" + "default":"pseudo_alignment_quantification/transcript_counts.tsv" } @@ -2054,10 +2054,10 @@ "pseudo_quant_merged_summarizedexperiment": { "type": "string", - "description": "Type: `file`, default: `$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment`. ", - "help_text": "Type: `file`, default: `$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment`. " + "description": "Type: `file`, default: `pseudo_alignment_quantification/quant_merged_summarizedexperiment`. ", + "help_text": "Type: `file`, default: `pseudo_alignment_quantification/quant_merged_summarizedexperiment`. " , - "default":"$id.$key.pseudo_quant_merged_summarizedexperiment.pseudo_quant_merged_summarizedexperiment" + "default":"pseudo_alignment_quantification/quant_merged_summarizedexperiment" }