diff --git a/CHANGELOG.md b/CHANGELOG.md index 46e5be4..9455be8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# demultiplex v0.3.9 + +## Minor changes + +* Bump viash to 0.9.3 (PR #43). + # demultiplex v0.3.8 ## Bug fixes diff --git a/_viash.yaml b/_viash.yaml index 5c86251..4a6eaaa 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -11,7 +11,7 @@ info: - path: gs://viash-hub-test-data/demultiplex/v2/ dest: testData -viash_version: 0.9.0 +viash_version: 0.9.3 config_mods: | .requirements.commands := ['ps'] diff --git a/target/executable/io/interop_summary_to_csv/.config.vsh.yaml b/target/executable/io/interop_summary_to_csv/.config.vsh.yaml index 17c72b6..e25c640 100644 --- a/target/executable/io/interop_summary_to_csv/.config.vsh.yaml +++ b/target/executable/io/interop_summary_to_csv/.config.vsh.yaml @@ -43,6 +43,9 @@ resources: dest: "nextflow_labels.config" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -145,10 +148,10 @@ build_info: engine: "docker|native" output: "target/executable/io/interop_summary_to_csv" executable: "target/executable/io/interop_summary_to_csv/interop_summary_to_csv" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -157,7 +160,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/executable/io/interop_summary_to_csv/interop_summary_to_csv b/target/executable/io/interop_summary_to_csv/interop_summary_to_csv index d1a534b..e273a30 100755 --- a/target/executable/io/interop_summary_to_csv/interop_summary_to_csv +++ b/target/executable/io/interop_summary_to_csv/interop_summary_to_csv @@ -2,7 +2,7 @@ # interop_summary_to_csv main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,22 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "interop_summary_to_csv main" - echo "" - echo "Input arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Sequencing run folder (*not* InterOp folder)." - echo "" - echo "Output arguments:" - echo " --output_run_summary" - echo " type: file, required parameter, output, file must exist" - echo "" - echo " --output_index_summary" - echo " type: file, required parameter, output, file must exist" -} # initialise variables VIASH_MODE='run' @@ -470,9 +454,9 @@ tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \ mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component io interop_summary_to_csv" -LABEL org.opencontainers.image.created="2025-03-27T15:56:06Z" +LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" -LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4" +LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -587,6 +571,48 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "interop_summary_to_csv main" + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Sequencing run folder (*not* InterOp folder)." + echo "" + echo "Output arguments:" + echo " --output_run_summary" + echo " type: file, required parameter, output, file must exist" + echo "" + echo " --output_index_summary" + echo " type: file, required parameter, output, file must exist" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/io/publish/.config.vsh.yaml b/target/executable/io/publish/.config.vsh.yaml index dc3e028..d800c71 100644 --- a/target/executable/io/publish/.config.vsh.yaml +++ b/target/executable/io/publish/.config.vsh.yaml @@ -100,6 +100,9 @@ resources: description: "Publish the processed results of the run" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,10 +198,10 @@ build_info: engine: "docker|native" output: "target/executable/io/publish" executable: "target/executable/io/publish/publish" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -207,7 +210,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/executable/io/publish/publish b/target/executable/io/publish/publish index 03ef618..540f79f 100755 --- a/target/executable/io/publish/publish +++ b/target/executable/io/publish/publish @@ -2,7 +2,7 @@ # publish main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,46 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "publish main" - echo "" - echo "Publish the processed results of the run" - echo "" - echo "Input arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Directory to write fastq data to" - echo "" - echo " --input_falco" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Directory to write falco output to" - echo "" - echo " --input_multiqc" - echo " type: file, required parameter, file must exist" - echo " Location where to write the MultiQC report to." - echo "" - echo " --input_run_information" - echo " type: file, required parameter, file must exist" - echo " Location where to write the run information to." - echo "" - echo "Output arguments:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: fastq" - echo "" - echo " --output_falco" - echo " type: file, output, file must exist" - echo " default: qc/fastqc" - echo "" - echo " --output_multiqc" - echo " type: file, output, file must exist" - echo " default: qc/multiqc_report.html" - echo "" - echo " --output_run_information" - echo " type: file, output, file must exist" - echo " default: run_information.csv" -} # initialise variables VIASH_MODE='run' @@ -490,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component io publish" -LABEL org.opencontainers.image.created="2025-03-27T15:56:05Z" +LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" -LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4" +LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -607,6 +567,72 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "publish main" + echo "" + echo "Publish the processed results of the run" + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Directory to write fastq data to" + echo "" + echo " --input_falco" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Directory to write falco output to" + echo "" + echo " --input_multiqc" + echo " type: file, required parameter, file must exist" + echo " Location where to write the MultiQC report to." + echo "" + echo " --input_run_information" + echo " type: file, required parameter, file must exist" + echo " Location where to write the run information to." + echo "" + echo "Output arguments:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: fastq" + echo "" + echo " --output_falco" + echo " type: file, output, file must exist" + echo " default: qc/fastqc" + echo "" + echo " --output_multiqc" + echo " type: file, output, file must exist" + echo " default: qc/multiqc_report.html" + echo "" + echo " --output_run_information" + echo " type: file, output, file must exist" + echo " default: run_information.csv" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/executable/io/untar/.config.vsh.yaml b/target/executable/io/untar/.config.vsh.yaml index 312f474..c50aacd 100644 --- a/target/executable/io/untar/.config.vsh.yaml +++ b/target/executable/io/untar/.config.vsh.yaml @@ -57,6 +57,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -152,10 +155,10 @@ build_info: engine: "docker|native" output: "target/executable/io/untar" executable: "target/executable/io/untar/untar" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -164,7 +167,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/executable/io/untar/untar b/target/executable/io/untar/untar index 561e4cd..96c92de 100755 --- a/target/executable/io/untar/untar +++ b/target/executable/io/untar/untar @@ -2,7 +2,7 @@ # untar main # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,32 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "untar main" - echo "" - echo "Unpack a .tar file. When the contents of the .tar file is just a single" - echo "directory," - echo "put the contents of the directory into the output folder instead of that" - echo "directory." - echo "" - echo "Input arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Tarball file to be unpacked." - echo "" - echo "Output arguments:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Directory to write the contents of the .tar file to." - echo "" - echo "Other arguments:" - echo " -e, --exclude" - echo " type: string" - echo " example: docs/figures" - echo " Prevents any file or member whose name matches the shell wildcard" - echo " (pattern) from being extracted." -} # initialise variables VIASH_MODE='run' @@ -476,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component io untar" -LABEL org.opencontainers.image.created="2025-03-27T15:56:05Z" +LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" -LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4" +LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -593,6 +567,58 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "untar main" + echo "" + echo "Unpack a .tar file. When the contents of the .tar file is just a single" + echo "directory," + echo "put the contents of the directory into the output folder instead of that" + echo "directory." + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Tarball file to be unpacked." + echo "" + echo "Output arguments:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Directory to write the contents of the .tar file to." + echo "" + echo "Other arguments:" + echo " -e, --exclude" + echo " type: string" + echo " example: docs/figures" + echo " Prevents any file or member whose name matches the shell wildcard" + echo " (pattern) from being extracted." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' diff --git a/target/nextflow/dataflow/combine_samples/.config.vsh.yaml b/target/nextflow/dataflow/combine_samples/.config.vsh.yaml index bc9be02..e6c41b8 100644 --- a/target/nextflow/dataflow/combine_samples/.config.vsh.yaml +++ b/target/nextflow/dataflow/combine_samples/.config.vsh.yaml @@ -80,6 +80,9 @@ description: "Combine fastq files from across samples into one event with a list \ fastq files per orientation." info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -161,10 +164,10 @@ build_info: engine: "native|native" output: "target/nextflow/dataflow/combine_samples" executable: "target/nextflow/dataflow/combine_samples/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -173,7 +176,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/dataflow/combine_samples/main.nf b/target/nextflow/dataflow/combine_samples/main.nf index 993ff98..8701195 100644 --- a/target/nextflow/dataflow/combine_samples/main.nf +++ b/target/nextflow/dataflow/combine_samples/main.nf @@ -1,6 +1,6 @@ // combine_samples main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2903,6 +3134,10 @@ meta = [ ], "description" : "Combine fastq files from across samples into one event with a list of fastq files per orientation.", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2999,10 +3234,10 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/dataflow/combine_samples", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3016,7 +3251,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml b/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml index 6922e1e..b360073 100644 --- a/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml @@ -56,6 +56,9 @@ description: "From a directory containing fastq files, gather the files per samp \ \nand validate according to the contents of the sample sheet.\n" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -137,10 +140,10 @@ build_info: engine: "native|native" output: "target/nextflow/dataflow/gather_fastqs_and_validate" executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -149,7 +152,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf b/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf index 22f84a7..9048313 100644 --- a/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf @@ -1,6 +1,6 @@ // gather_fastqs_and_validate main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2876,6 +3107,10 @@ meta = [ ], "description" : "From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2972,10 +3207,10 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/dataflow/gather_fastqs_and_validate", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -2989,7 +3224,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/demultiplex/.config.vsh.yaml b/target/nextflow/demultiplex/.config.vsh.yaml index 26b4a45..6783845 100644 --- a/target/nextflow/demultiplex/.config.vsh.yaml +++ b/target/nextflow/demultiplex/.config.vsh.yaml @@ -124,6 +124,9 @@ test_resources: entrypoint: "test_bases2fastq" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -243,10 +246,10 @@ build_info: engine: "native|native" output: "target/nextflow/demultiplex" executable: "target/nextflow/demultiplex/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" dependencies: - "target/nextflow/io/untar" - "target/nextflow/dataflow/gather_fastqs_and_validate" @@ -264,7 +267,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/demultiplex/main.nf b/target/nextflow/demultiplex/main.nf index 96bbb3f..50fd048 100644 --- a/target/nextflow/demultiplex/main.nf +++ b/target/nextflow/demultiplex/main.nf @@ -1,6 +1,6 @@ // demultiplex main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2957,6 +3188,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3119,10 +3354,10 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/demultiplex", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3136,7 +3371,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ diff --git a/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml b/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml index 7f86975..1a0c5a2 100644 --- a/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml +++ b/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml @@ -43,6 +43,9 @@ resources: dest: "nextflow_labels.config" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -145,10 +148,10 @@ build_info: engine: "docker|native" output: "target/nextflow/io/interop_summary_to_csv" executable: "target/nextflow/io/interop_summary_to_csv/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -157,7 +160,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/io/interop_summary_to_csv/main.nf b/target/nextflow/io/interop_summary_to_csv/main.nf index 3cbb9e7..d92c976 100644 --- a/target/nextflow/io/interop_summary_to_csv/main.nf +++ b/target/nextflow/io/interop_summary_to_csv/main.nf @@ -1,6 +1,6 @@ // interop_summary_to_csv main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2863,6 +3094,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2984,10 +3219,10 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/io/interop_summary_to_csv", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3001,7 +3236,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ @@ -3404,7 +3639,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3418,6 +3653,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/io/publish/.config.vsh.yaml b/target/nextflow/io/publish/.config.vsh.yaml index 9753688..02031bb 100644 --- a/target/nextflow/io/publish/.config.vsh.yaml +++ b/target/nextflow/io/publish/.config.vsh.yaml @@ -100,6 +100,9 @@ resources: description: "Publish the processed results of the run" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -195,10 +198,10 @@ build_info: engine: "docker|native" output: "target/nextflow/io/publish" executable: "target/nextflow/io/publish/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -207,7 +210,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/io/publish/main.nf b/target/nextflow/io/publish/main.nf index 10eb3c2..7716a8e 100644 --- a/target/nextflow/io/publish/main.nf +++ b/target/nextflow/io/publish/main.nf @@ -1,6 +1,6 @@ // publish main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2929,6 +3160,10 @@ meta = [ ], "description" : "Publish the processed results of the run", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3043,10 +3278,10 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/io/publish", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3060,7 +3295,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ @@ -3491,7 +3726,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3505,6 +3740,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/io/untar/.config.vsh.yaml b/target/nextflow/io/untar/.config.vsh.yaml index 76162ac..78885a2 100644 --- a/target/nextflow/io/untar/.config.vsh.yaml +++ b/target/nextflow/io/untar/.config.vsh.yaml @@ -57,6 +57,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -152,10 +155,10 @@ build_info: engine: "docker|native" output: "target/nextflow/io/untar" executable: "target/nextflow/io/untar/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" package_config: name: "demultiplex" version: "main" @@ -164,7 +167,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/io/untar/main.nf b/target/nextflow/io/untar/main.nf index 25554f3..7d6c819 100644 --- a/target/nextflow/io/untar/main.nf +++ b/target/nextflow/io/untar/main.nf @@ -1,6 +1,6 @@ // untar main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2882,6 +3113,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -2996,10 +3231,10 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/io/untar", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3013,7 +3248,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [ @@ -3446,7 +3681,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3460,6 +3695,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/runner/.config.vsh.yaml b/target/nextflow/runner/.config.vsh.yaml index 7a362c6..2c08a0b 100644 --- a/target/nextflow/runner/.config.vsh.yaml +++ b/target/nextflow/runner/.config.vsh.yaml @@ -103,6 +103,9 @@ resources: description: "Runner for demultiplexing of raw sequencing data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -191,10 +194,10 @@ build_info: engine: "native|native" output: "target/nextflow/runner" executable: "target/nextflow/runner/main.nf" - viash_version: "0.9.0" - git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4" + viash_version: "0.9.3" + git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0" git_remote: "https://github.com/viash-hub/demultiplex" - git_tag: "v0.1.1-21-g0c27ec1" + git_tag: "v0.1.1-22-g162497a" dependencies: - "target/nextflow/demultiplex" - "target/nextflow/io/publish" @@ -206,7 +209,7 @@ package_config: test_resources: - path: "gs://viash-hub-test-data/demultiplex/v2/" dest: "testData" - viash_version: "0.9.0" + viash_version: "0.9.3" source: "src" target: "target" config_mods: diff --git a/target/nextflow/runner/main.nf b/target/nextflow/runner/main.nf index 73fe658..69c2604 100644 --- a/target/nextflow/runner/main.nf +++ b/target/nextflow/runner/main.nf @@ -1,6 +1,6 @@ // runner main // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1882,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1894,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1906,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2929,6 +3160,10 @@ meta = [ ], "description" : "Runner for demultiplexing of raw sequencing data", "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3039,10 +3274,10 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/runner", - "viash_version" : "0.9.0", - "git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4", + "viash_version" : "0.9.3", + "git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0", "git_remote" : "https://github.com/viash-hub/demultiplex", - "git_tag" : "v0.1.1-21-g0c27ec1" + "git_tag" : "v0.1.1-22-g162497a" }, "package_config" : { "name" : "demultiplex", @@ -3056,7 +3291,7 @@ meta = [ } ] }, - "viash_version" : "0.9.0", + "viash_version" : "0.9.3", "source" : "src", "target" : "target", "config_mods" : [