Build branch main with version main (162497a)

Build pipeline: viash-hub.demultiplex.main-ss9kt

Source commit: 162497ab73

Source message: Bump viash to 0.9.3 (#43)

* Bump viash to 0.9.3

* Add CHANGELOG
This commit is contained in:
CI
2025-04-22 12:11:00 +00:00
parent 51fdf9e8bf
commit eb6fd9e999
22 changed files with 2468 additions and 646 deletions

View File

@@ -1,3 +1,9 @@
# demultiplex v0.3.9
## Minor changes
* Bump viash to 0.9.3 (PR #43).
# demultiplex v0.3.8
## Bug fixes

View File

@@ -11,7 +11,7 @@ info:
- path: gs://viash-hub-test-data/demultiplex/v2/
dest: testData
viash_version: 0.9.0
viash_version: 0.9.3
config_mods: |
.requirements.commands := ['ps']

View File

@@ -43,6 +43,9 @@ resources:
dest: "nextflow_labels.config"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -145,10 +148,10 @@ build_info:
engine: "docker|native"
output: "target/executable/io/interop_summary_to_csv"
executable: "target/executable/io/interop_summary_to_csv/interop_summary_to_csv"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -157,7 +160,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# interop_summary_to_csv main
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,22 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "interop_summary_to_csv main"
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Sequencing run folder (*not* InterOp folder)."
echo ""
echo "Output arguments:"
echo " --output_run_summary"
echo " type: file, required parameter, output, file must exist"
echo ""
echo " --output_index_summary"
echo " type: file, required parameter, output, file must exist"
}
# initialise variables
VIASH_MODE='run'
@@ -470,9 +454,9 @@ tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \
mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/
LABEL org.opencontainers.image.description="Companion container for running component io interop_summary_to_csv"
LABEL org.opencontainers.image.created="2025-03-27T15:56:06Z"
LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -587,6 +571,48 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "interop_summary_to_csv main"
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Sequencing run folder (*not* InterOp folder)."
echo ""
echo "Output arguments:"
echo " --output_run_summary"
echo " type: file, required parameter, output, file must exist"
echo ""
echo " --output_index_summary"
echo " type: file, required parameter, output, file must exist"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -100,6 +100,9 @@ resources:
description: "Publish the processed results of the run"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -195,10 +198,10 @@ build_info:
engine: "docker|native"
output: "target/executable/io/publish"
executable: "target/executable/io/publish/publish"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -207,7 +210,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# publish main
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,46 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish main"
echo ""
echo "Publish the processed results of the run"
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Directory to write fastq data to"
echo ""
echo " --input_falco"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write falco output to"
echo ""
echo " --input_multiqc"
echo " type: file, required parameter, file must exist"
echo " Location where to write the MultiQC report to."
echo ""
echo " --input_run_information"
echo " type: file, required parameter, file must exist"
echo " Location where to write the run information to."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: fastq"
echo ""
echo " --output_falco"
echo " type: file, output, file must exist"
echo " default: qc/fastqc"
echo ""
echo " --output_multiqc"
echo " type: file, output, file must exist"
echo " default: qc/multiqc_report.html"
echo ""
echo " --output_run_information"
echo " type: file, output, file must exist"
echo " default: run_information.csv"
}
# initialise variables
VIASH_MODE='run'
@@ -490,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io publish"
LABEL org.opencontainers.image.created="2025-03-27T15:56:05Z"
LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -607,6 +567,72 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish main"
echo ""
echo "Publish the processed results of the run"
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Directory to write fastq data to"
echo ""
echo " --input_falco"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write falco output to"
echo ""
echo " --input_multiqc"
echo " type: file, required parameter, file must exist"
echo " Location where to write the MultiQC report to."
echo ""
echo " --input_run_information"
echo " type: file, required parameter, file must exist"
echo " Location where to write the run information to."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: fastq"
echo ""
echo " --output_falco"
echo " type: file, output, file must exist"
echo " default: qc/fastqc"
echo ""
echo " --output_multiqc"
echo " type: file, output, file must exist"
echo " default: qc/multiqc_report.html"
echo ""
echo " --output_run_information"
echo " type: file, output, file must exist"
echo " default: run_information.csv"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -57,6 +57,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -152,10 +155,10 @@ build_info:
engine: "docker|native"
output: "target/executable/io/untar"
executable: "target/executable/io/untar/untar"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -164,7 +167,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# untar main
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,32 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "untar main"
echo ""
echo "Unpack a .tar file. When the contents of the .tar file is just a single"
echo "directory,"
echo "put the contents of the directory into the output folder instead of that"
echo "directory."
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Tarball file to be unpacked."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, required parameter, output, file must exist"
echo " Directory to write the contents of the .tar file to."
echo ""
echo "Other arguments:"
echo " -e, --exclude"
echo " type: string"
echo " example: docs/figures"
echo " Prevents any file or member whose name matches the shell wildcard"
echo " (pattern) from being extracted."
}
# initialise variables
VIASH_MODE='run'
@@ -476,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io untar"
LABEL org.opencontainers.image.created="2025-03-27T15:56:05Z"
LABEL org.opencontainers.image.created="2025-04-22T11:54:46Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
LABEL org.opencontainers.image.revision="162497ab73faf321d5166fe34cd1f6976b14dcb0"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -593,6 +567,58 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "untar main"
echo ""
echo "Unpack a .tar file. When the contents of the .tar file is just a single"
echo "directory,"
echo "put the contents of the directory into the output folder instead of that"
echo "directory."
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, file must exist"
echo " Tarball file to be unpacked."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, required parameter, output, file must exist"
echo " Directory to write the contents of the .tar file to."
echo ""
echo "Other arguments:"
echo " -e, --exclude"
echo " type: string"
echo " example: docs/figures"
echo " Prevents any file or member whose name matches the shell wildcard"
echo " (pattern) from being extracted."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -80,6 +80,9 @@ description: "Combine fastq files from across samples into one event with a list
\ fastq files per orientation."
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -161,10 +164,10 @@ build_info:
engine: "native|native"
output: "target/nextflow/dataflow/combine_samples"
executable: "target/nextflow/dataflow/combine_samples/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -173,7 +176,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// combine_samples main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2903,6 +3134,10 @@ meta = [
],
"description" : "Combine fastq files from across samples into one event with a list of fastq files per orientation.",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2999,10 +3234,10 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/dataflow/combine_samples",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3016,7 +3251,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -56,6 +56,9 @@ description: "From a directory containing fastq files, gather the files per samp
\ \nand validate according to the contents of the sample sheet.\n"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -137,10 +140,10 @@ build_info:
engine: "native|native"
output: "target/nextflow/dataflow/gather_fastqs_and_validate"
executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -149,7 +152,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// gather_fastqs_and_validate main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2876,6 +3107,10 @@ meta = [
],
"description" : "From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2972,10 +3207,10 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/dataflow/gather_fastqs_and_validate",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -2989,7 +3224,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -124,6 +124,9 @@ test_resources:
entrypoint: "test_bases2fastq"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -243,10 +246,10 @@ build_info:
engine: "native|native"
output: "target/nextflow/demultiplex"
executable: "target/nextflow/demultiplex/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
dependencies:
- "target/nextflow/io/untar"
- "target/nextflow/dataflow/gather_fastqs_and_validate"
@@ -264,7 +267,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// demultiplex main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2957,6 +3188,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3119,10 +3354,10 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/demultiplex",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3136,7 +3371,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -43,6 +43,9 @@ resources:
dest: "nextflow_labels.config"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -145,10 +148,10 @@ build_info:
engine: "docker|native"
output: "target/nextflow/io/interop_summary_to_csv"
executable: "target/nextflow/io/interop_summary_to_csv/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -157,7 +160,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// interop_summary_to_csv main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2863,6 +3094,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2984,10 +3219,10 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/io/interop_summary_to_csv",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3001,7 +3236,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3404,7 +3639,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
// create process from temp file
def binding = new nextflow.script.ScriptBinding([:])
def session = nextflow.Nextflow.getSession()
def parser = new nextflow.script.ScriptParser(session)
def parser = _getScriptLoader(session)
.setModule(true)
.setBinding(binding)
def moduleScript = parser.runScript(tempFile)
@@ -3418,6 +3653,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
return scriptMeta.getProcess(procKey)
}
// use Reflection to get a ScriptParser / ScriptLoader
// <25.02.0-edge: new nextflow.script.ScriptParser(session)
// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session)
def _getScriptLoader(nextflow.Session session) {
// try using the old method
try {
Class<?> scriptParserClass = Class.forName('nextflow.script.ScriptParser')
return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session)
} catch (ClassNotFoundException e) {
// else try with the new method
try {
Class<?> scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory')
def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session)
return createMethod.invoke(null, session) // null because create is static
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) {
// Handle the case where neither class is found
throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2)
}
}
}
// defaults
meta["defaults"] = [
// key to be used to trace the process and determine output names

View File

@@ -100,6 +100,9 @@ resources:
description: "Publish the processed results of the run"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -195,10 +198,10 @@ build_info:
engine: "docker|native"
output: "target/nextflow/io/publish"
executable: "target/nextflow/io/publish/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -207,7 +210,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// publish main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2929,6 +3160,10 @@ meta = [
],
"description" : "Publish the processed results of the run",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3043,10 +3278,10 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/io/publish",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3060,7 +3295,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3491,7 +3726,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
// create process from temp file
def binding = new nextflow.script.ScriptBinding([:])
def session = nextflow.Nextflow.getSession()
def parser = new nextflow.script.ScriptParser(session)
def parser = _getScriptLoader(session)
.setModule(true)
.setBinding(binding)
def moduleScript = parser.runScript(tempFile)
@@ -3505,6 +3740,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
return scriptMeta.getProcess(procKey)
}
// use Reflection to get a ScriptParser / ScriptLoader
// <25.02.0-edge: new nextflow.script.ScriptParser(session)
// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session)
def _getScriptLoader(nextflow.Session session) {
// try using the old method
try {
Class<?> scriptParserClass = Class.forName('nextflow.script.ScriptParser')
return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session)
} catch (ClassNotFoundException e) {
// else try with the new method
try {
Class<?> scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory')
def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session)
return createMethod.invoke(null, session) // null because create is static
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) {
// Handle the case where neither class is found
throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2)
}
}
}
// defaults
meta["defaults"] = [
// key to be used to trace the process and determine output names

View File

@@ -57,6 +57,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -152,10 +155,10 @@ build_info:
engine: "docker|native"
output: "target/nextflow/io/untar"
executable: "target/nextflow/io/untar/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
package_config:
name: "demultiplex"
version: "main"
@@ -164,7 +167,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// untar main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2882,6 +3113,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2996,10 +3231,10 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/io/untar",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3013,7 +3248,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3446,7 +3681,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
// create process from temp file
def binding = new nextflow.script.ScriptBinding([:])
def session = nextflow.Nextflow.getSession()
def parser = new nextflow.script.ScriptParser(session)
def parser = _getScriptLoader(session)
.setModule(true)
.setBinding(binding)
def moduleScript = parser.runScript(tempFile)
@@ -3460,6 +3695,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
return scriptMeta.getProcess(procKey)
}
// use Reflection to get a ScriptParser / ScriptLoader
// <25.02.0-edge: new nextflow.script.ScriptParser(session)
// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session)
def _getScriptLoader(nextflow.Session session) {
// try using the old method
try {
Class<?> scriptParserClass = Class.forName('nextflow.script.ScriptParser')
return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session)
} catch (ClassNotFoundException e) {
// else try with the new method
try {
Class<?> scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory')
def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session)
return createMethod.invoke(null, session) // null because create is static
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) {
// Handle the case where neither class is found
throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2)
}
}
}
// defaults
meta["defaults"] = [
// key to be used to trace the process and determine output names

View File

@@ -103,6 +103,9 @@ resources:
description: "Runner for demultiplexing of raw sequencing data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -191,10 +194,10 @@ build_info:
engine: "native|native"
output: "target/nextflow/runner"
executable: "target/nextflow/runner/main.nf"
viash_version: "0.9.0"
git_commit: "0c27ec143671b3d9c985f2b5dc92f2a2065349d4"
viash_version: "0.9.3"
git_commit: "162497ab73faf321d5166fe34cd1f6976b14dcb0"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-21-g0c27ec1"
git_tag: "v0.1.1-22-g162497a"
dependencies:
- "target/nextflow/demultiplex"
- "target/nextflow/io/publish"
@@ -206,7 +209,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
viash_version: "0.9.3"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// runner main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.3 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,6 +2692,7 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,14 +2988,14 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
@@ -2770,8 +3003,6 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2929,6 +3160,10 @@ meta = [
],
"description" : "Runner for demultiplexing of raw sequencing data",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3039,10 +3274,10 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/runner",
"viash_version" : "0.9.0",
"git_commit" : "0c27ec143671b3d9c985f2b5dc92f2a2065349d4",
"viash_version" : "0.9.3",
"git_commit" : "162497ab73faf321d5166fe34cd1f6976b14dcb0",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-21-g0c27ec1"
"git_tag" : "v0.1.1-22-g162497a"
},
"package_config" : {
"name" : "demultiplex",
@@ -3056,7 +3291,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.3",
"source" : "src",
"target" : "target",
"config_mods" : [