Build branch main with version main (795abd6)
Build pipeline: viash-hub.demultiplex.main-4xxbp
Source commit: 795abd6868
Source message: Run Falco in parallel for each well (#33)
This commit is contained in:
@@ -1,5 +1,12 @@
|
||||
# demultiplex v0.3.5
|
||||
|
||||
## Breaking changes
|
||||
|
||||
* The `demultiplex` workflow now outputs a list of directories
|
||||
for the `output_falco` argument (one for each barcode) instead of one directory
|
||||
for the complete run. The output from the `runner` workflow remained
|
||||
unchanged (PR #33).
|
||||
|
||||
## Minor updates
|
||||
|
||||
* In case Illumina data is detected in the input folder, check for the presence of the 'copyComplete.txt' file.
|
||||
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Data Intuitive
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
100
README.md
Normal file
100
README.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Demultiplex.vsh
|
||||
|
||||
Demultiplex.vsh is a workflow for demultiplexing of raw sequencing data. Currently data from Illumina and Element Biosciences sequencers are supported.
|
||||
|
||||
[](https://web.viash-hub.com/packages/demultiplex)
|
||||
[](https://github.com/viash-hub/demultiplex)
|
||||
[](https://github.com/viash-hub/demultiplex/blob/main/LICENSE)
|
||||
[](https://github.com/viash-hub/demultiplex/issues)
|
||||
[](https://viash.io)
|
||||
|
||||
## Workflow Overview
|
||||
The workflow executes the following steps:
|
||||
1. Unpacking the input data (when a TAR archive is provided)
|
||||
2. Run `bclconvert` or `bases2fastq`
|
||||
3. Run `falco` and convert Illumina InterOp information to csv
|
||||
4. Run `multiqc` to generate a report
|
||||
|
||||
## Usage
|
||||
|
||||
Two variants of the same workflow are provided, depending on the flexibility in the ouput structure required:
|
||||
|
||||
* The `runner` workflow provides a predifined output structure. It requires the minimal amount of parameters to be provided, at the cost of being less flexible. It is located at `target/nextflow/runner/main.nf`
|
||||
* The `demultiplex` workflow (`target/nextflow/demultiplex/main.nf`) allows for more fine-grained tuning, but required more parameters to be provided.
|
||||
|
||||
### Test data
|
||||
|
||||
We have provided test data at `gs://viash-hub-test-data/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2`, but please feel free to bring your own. The URL of the test data can be provided as-is to the workflow, or you can download everything and specify a local path.
|
||||
|
||||
### Setup
|
||||
|
||||
In order to use the workflows in this package, you'll need to do the following:
|
||||
* Install [nextflow](https://www.nextflow.io/docs/latest/install.html)
|
||||
* Install a nextflow compatible executor. This workflow provides a profile for [docker](https://docs.docker.com/get-started/).
|
||||
|
||||
### Setting up SCM
|
||||
|
||||
In order to let nextflow use the viash-hub workflows, you need to setup a [SCM](https://www.nextflow.io/docs/latest/git.html#git-configuration) file. This can be done once by creating `$HOME/.nextflow/scm` and adding the following:
|
||||
```
|
||||
providers {
|
||||
vsh {
|
||||
platform = 'gitlab'
|
||||
server = "packages.viash-hub.com"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Alternatively, a custom location for the SCM file can be specified using the `NXF_SCM_FILE` environment variable.
|
||||
|
||||
You can check if everything is working by getting the `--help` for a workflow:
|
||||
```bash
|
||||
nextflow run \
|
||||
vsh/demultiplex \
|
||||
-r v0.3.4 \
|
||||
--help
|
||||
```
|
||||
|
||||
### (Optional) Resource usage tuning
|
||||
|
||||
Nextflow's labels can be used to specify the amount of resources a process can use. This workflow uses the following labels for CPU and memory:
|
||||
* `verylowmem`, `lowmem`, `midmem`, `highmem`
|
||||
* `verylowcpu`, `lowcpu`, `midcpu`, `highcpu`
|
||||
|
||||
The defaults for these labels can be found at `src/config/labels.config`. Nextflow checks that the specified resources for a process do not exceed what is available on the machine and will not start if it does. Create your own config file to tune the labels to your needs, for example:
|
||||
|
||||
```
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 16 }
|
||||
|
||||
withLabel: verylowmem { memory = 4.GB }
|
||||
withLabel: lowmem { memory = 8.GB }
|
||||
withLabel: midmem { memory = 8.GB }
|
||||
withLabel: highmem { memory = 8.GB }
|
||||
```
|
||||
|
||||
When starting nextflow using the CLI, you can use `-c` to provide the file to nextflow and overwrite the defaults.
|
||||
|
||||
### Example
|
||||
|
||||
```bash
|
||||
nextflow run vsh/demultiplex \
|
||||
-r v0.3.4 \
|
||||
-main-script target/nextflow/runner/main.nf \
|
||||
--input "gs://viash-hub-test-data/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2" \
|
||||
--demultiplexer bclconvert \
|
||||
--publish_dir example_output/ \
|
||||
-profile docker \
|
||||
-c labels.config
|
||||
```
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
Developed in collaboration with Data Intuitive and Open Analytics.
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ info:
|
||||
- path: gs://viash-hub-test-data/demultiplex/v2/
|
||||
dest: testData
|
||||
|
||||
viash_version: 0.9.1
|
||||
viash_version: 0.9.0
|
||||
|
||||
config_mods: |
|
||||
.requirements.commands := ['ps']
|
||||
|
||||
@@ -16,6 +16,9 @@ argument_groups:
|
||||
type: file
|
||||
required: false
|
||||
multiple: true
|
||||
- name: "--falco_dir"
|
||||
type: file
|
||||
required: true
|
||||
- name: Output arguments
|
||||
arguments:
|
||||
- name: --output_forward
|
||||
@@ -28,6 +31,11 @@ argument_groups:
|
||||
direction: output
|
||||
multiple: true
|
||||
required: false
|
||||
- name: "--output_falco"
|
||||
type: file
|
||||
direction: output
|
||||
required: true
|
||||
multiple: true
|
||||
resources:
|
||||
- type: nextflow_script
|
||||
path: main.nf
|
||||
|
||||
@@ -13,10 +13,12 @@ workflow run_wf {
|
||||
// Gather the following state for all samples
|
||||
def forward_fastqs = states.collect{it.forward_input}.flatten()
|
||||
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten()
|
||||
def falco_dirs = states.collect{it.falco_dir}
|
||||
|
||||
def resultState = [
|
||||
"output_forward": forward_fastqs,
|
||||
"output_reverse": reverse_fastqs,
|
||||
"output_falco": falco_dirs,
|
||||
// The join ID is the same across all samples from the same run
|
||||
"_meta": ["join_id": states[0]._meta.join_id]
|
||||
]
|
||||
|
||||
@@ -41,6 +41,7 @@ argument_groups:
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
multiple: true
|
||||
default: "$id/qc/fastqc"
|
||||
- name: "--output_multiqc"
|
||||
description: Directory to write falco output to
|
||||
|
||||
@@ -173,28 +173,12 @@ workflow run_wf {
|
||||
)
|
||||
|
||||
output_ch = samples_ch
|
||||
|
||||
|
||||
| combine_samples.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"id": state.run_id,
|
||||
"forward_input": state.fastq_forward,
|
||||
"reverse_input": state.fastq_reverse,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"forward_fastqs": "output_forward",
|
||||
"reverse_fastqs": "output_reverse",
|
||||
]
|
||||
)
|
||||
| falco.run(
|
||||
directives: [label: ["lowcpu", "lowmem"]],
|
||||
directives: [label: ["verylowcpu", "lowmem"]],
|
||||
fromState: {id, state ->
|
||||
reverse_fastqs_list = state.reverse_fastqs ? state.reverse_fastqs : []
|
||||
[
|
||||
"input": state.forward_fastqs + reverse_fastqs_list,
|
||||
"outdir": "${state.output_falco}",
|
||||
"input": [state.fastq_forward, state.fastq_reverse],
|
||||
"outdir": "$id/qc/falco",
|
||||
"summary_filename": null,
|
||||
"report_filename": null,
|
||||
"data_filename": null,
|
||||
@@ -204,11 +188,28 @@ workflow run_wf {
|
||||
state + [ "output_falco" : result.outdir ]
|
||||
}
|
||||
)
|
||||
|
||||
| combine_samples.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"id": state.run_id,
|
||||
"forward_input": state.fastq_forward,
|
||||
"reverse_input": state.fastq_reverse,
|
||||
"falco_dir": state.output_falco,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"forward_fastqs": "output_forward",
|
||||
"reverse_fastqs": "output_reverse",
|
||||
"output_falco": "output_falco",
|
||||
]
|
||||
)
|
||||
|
||||
| multiqc.run(
|
||||
directives: [label: ["midcpu", "midmem"]],
|
||||
fromState: {id, state ->
|
||||
def new_state = [
|
||||
"input": [state.output_falco],
|
||||
"input": state.output_falco,
|
||||
"output_report": state.output_multiqc,
|
||||
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}'
|
||||
]
|
||||
|
||||
@@ -25,7 +25,9 @@ workflow test_illumina {
|
||||
}
|
||||
| map {id, state ->
|
||||
assert state.output.isDirectory(): "Expected bclconvert output to be a directory"
|
||||
assert state.output_falco.isDirectory(): "Expected falco output to be a directory"
|
||||
state.output_falco.each{
|
||||
assert it.isDirectory(): "Expected falco output to be a directory"
|
||||
}
|
||||
assert state.output_multiqc.isFile(): "Expected multiQC output to be a file"
|
||||
fastq_files = state.output.listFiles().collect{it.name}
|
||||
assert ["Undetermined_S0_L001_R1_001.fastq.gz", "Sample23_S3_L001_R1_001.fastq.gz",
|
||||
@@ -76,7 +78,7 @@ workflow test_bases2fastq {
|
||||
}
|
||||
| map {id, state ->
|
||||
assert state.output.isDirectory(): "Expected bases2fastq output to be a directory"
|
||||
assert state.output_falco.isDirectory(): "Expected falco output to be a directory"
|
||||
state.output_falco.each{assert it.isDirectory(): "Expected falco output to be a directory"}
|
||||
assert state.output_multiqc.isFile(): "Expected multiQC output to be a file"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
set -eo pipefail
|
||||
|
||||
declare -A input_output_mapping=(["par_input"]="par_output"
|
||||
["par_input_falco"]="par_output_falco"
|
||||
["par_input_multiqc"]="par_output_multiqc"
|
||||
["par_input_run_information"]="par_output_run_information"
|
||||
)
|
||||
@@ -23,4 +22,12 @@ do
|
||||
|
||||
echo "Output files for $output_location:"
|
||||
ls "$output_location"
|
||||
done
|
||||
|
||||
echo "Grouping output from $par_input_falco into $par_output_falco"
|
||||
mkdir -p "$par_output_falco"
|
||||
IFS=";" read -ra falco_inputs <<< $par_input_falco
|
||||
for falco_dir in "${falco_inputs[@]}"; do
|
||||
echo "Copying contents of $falco_dir"
|
||||
find -H -D exec "$falco_dir" -type f -maxdepth 1 -exec cp -t "$par_output_falco" {} +
|
||||
done
|
||||
@@ -12,6 +12,7 @@ argument_groups:
|
||||
description: Directory to write falco output to
|
||||
type: file
|
||||
required: true
|
||||
multiple: true
|
||||
- name: "--input_multiqc"
|
||||
description: Location where to write the MultiQC report to.
|
||||
type: file
|
||||
|
||||
@@ -25,9 +25,9 @@ workflow run_wf {
|
||||
"run_information": state.run_information,
|
||||
"demultiplexer": state.demultiplexer,
|
||||
"skip_copycomplete_check": state.skip_copycomplete_check,
|
||||
"output": "fastq",
|
||||
"output_falco": "qc/fastqc",
|
||||
"output_multiqc": "qc/multiqc_report.html",
|
||||
"output": "$id/fastq",
|
||||
"output_falco": "$id/qc/fastqc",
|
||||
"output_multiqc": "$id/qc/multiqc_report.html",
|
||||
]
|
||||
if (state.run_information) {
|
||||
state_to_pass += ["output_run_information": state.run_information.getName()]
|
||||
|
||||
@@ -43,9 +43,6 @@ resources:
|
||||
dest: "nextflow_labels.config"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -148,10 +145,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/interop_summary_to_csv"
|
||||
executable: "target/executable/io/interop_summary_to_csv/interop_summary_to_csv"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -160,7 +157,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# interop_summary_to_csv main
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,6 +169,22 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "interop_summary_to_csv main"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Sequencing run folder (*not* InterOp folder)."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output_run_summary"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo ""
|
||||
echo " --output_index_summary"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -454,9 +470,9 @@ tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \
|
||||
mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component io interop_summary_to_csv"
|
||||
LABEL org.opencontainers.image.created="2025-01-14T11:56:35Z"
|
||||
LABEL org.opencontainers.image.created="2025-03-04T05:45:47Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
|
||||
LABEL org.opencontainers.image.revision="dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
LABEL org.opencontainers.image.revision="795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -571,48 +587,6 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "interop_summary_to_csv main"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Sequencing run folder (*not* InterOp folder)."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output_run_summary"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo ""
|
||||
echo " --output_index_summary"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ argument_groups:
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_multiqc"
|
||||
@@ -100,9 +100,6 @@ resources:
|
||||
description: "Publish the processed results of the run"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -198,10 +195,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/publish"
|
||||
executable: "target/executable/io/publish/publish"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -210,7 +207,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# publish main
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,6 +169,46 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish main"
|
||||
echo ""
|
||||
echo "Publish the processed results of the run"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Directory to write fastq data to"
|
||||
echo ""
|
||||
echo " --input_falco"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Directory to write falco output to"
|
||||
echo ""
|
||||
echo " --input_multiqc"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Location where to write the MultiQC report to."
|
||||
echo ""
|
||||
echo " --input_run_information"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Location where to write the run information to."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: fastq"
|
||||
echo ""
|
||||
echo " --output_falco"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: qc/fastqc"
|
||||
echo ""
|
||||
echo " --output_multiqc"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: qc/multiqc_report.html"
|
||||
echo ""
|
||||
echo " --output_run_information"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: run_information.csv"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -450,9 +490,9 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component io publish"
|
||||
LABEL org.opencontainers.image.created="2025-01-14T11:56:35Z"
|
||||
LABEL org.opencontainers.image.created="2025-03-04T05:45:47Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
|
||||
LABEL org.opencontainers.image.revision="dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
LABEL org.opencontainers.image.revision="795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -567,72 +607,6 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish main"
|
||||
echo ""
|
||||
echo "Publish the processed results of the run"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Directory to write fastq data to"
|
||||
echo ""
|
||||
echo " --input_falco"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Directory to write falco output to"
|
||||
echo ""
|
||||
echo " --input_multiqc"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Location where to write the MultiQC report to."
|
||||
echo ""
|
||||
echo " --input_run_information"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Location where to write the run information to."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: fastq"
|
||||
echo ""
|
||||
echo " --output_falco"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: qc/fastqc"
|
||||
echo ""
|
||||
echo " --output_multiqc"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: qc/multiqc_report.html"
|
||||
echo ""
|
||||
echo " --output_run_information"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: run_information.csv"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
@@ -670,14 +644,20 @@ while [[ $# -gt 0 ]]; do
|
||||
shift 1
|
||||
;;
|
||||
--input_falco)
|
||||
[ -n "$VIASH_PAR_INPUT_FALCO" ] && ViashError Bad arguments for option \'--input_falco\': \'$VIASH_PAR_INPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_INPUT_FALCO="$2"
|
||||
if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
VIASH_PAR_INPUT_FALCO="$2"
|
||||
else
|
||||
VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;""$2"
|
||||
fi
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_falco. Use "--help" to get more information on the parameters. && exit 1
|
||||
shift 2
|
||||
;;
|
||||
--input_falco=*)
|
||||
[ -n "$VIASH_PAR_INPUT_FALCO" ] && ViashError Bad arguments for option \'--input_falco=*\': \'$VIASH_PAR_INPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_INPUT_FALCO=$(ViashRemoveFlags "$1")
|
||||
if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
VIASH_PAR_INPUT_FALCO=$(ViashRemoveFlags "$1")
|
||||
else
|
||||
VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;"$(ViashRemoveFlags "$1")
|
||||
fi
|
||||
shift 1
|
||||
;;
|
||||
--input_multiqc)
|
||||
@@ -978,9 +958,17 @@ if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then
|
||||
ViashError "Input file '$VIASH_PAR_INPUT' does not exist."
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ] && [ ! -e "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
ViashError "Input file '$VIASH_PAR_INPUT_FALCO' does not exist."
|
||||
exit 1
|
||||
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
IFS=';'
|
||||
set -f
|
||||
for file in $VIASH_PAR_INPUT_FALCO; do
|
||||
unset IFS
|
||||
if [ ! -e "$file" ]; then
|
||||
ViashError "Input file '$file' does not exist."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
set +f
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ] && [ ! -e "$VIASH_PAR_INPUT_MULTIQC" ]; then
|
||||
ViashError "Input file '$VIASH_PAR_INPUT_MULTIQC' does not exist."
|
||||
@@ -1096,8 +1084,15 @@ if [ ! -z "$VIASH_PAR_INPUT" ]; then
|
||||
VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT")
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_FALCO")" )
|
||||
VIASH_PAR_INPUT_FALCO=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_FALCO")
|
||||
VIASH_TEST_INPUT_FALCO=()
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_FALCO; do
|
||||
unset IFS
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
||||
var=$(ViashDockerAutodetectMount "$var")
|
||||
VIASH_TEST_INPUT_FALCO+=( "$var" )
|
||||
done
|
||||
VIASH_PAR_INPUT_FALCO=$(IFS=';' ; echo "${VIASH_TEST_INPUT_FALCO[*]}")
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_MULTIQC")" )
|
||||
@@ -1229,7 +1224,6 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
|
||||
set -eo pipefail
|
||||
|
||||
declare -A input_output_mapping=(["par_input"]="par_output"
|
||||
["par_input_falco"]="par_output_falco"
|
||||
["par_input_multiqc"]="par_output_multiqc"
|
||||
["par_input_run_information"]="par_output_run_information"
|
||||
)
|
||||
@@ -1250,6 +1244,14 @@ do
|
||||
echo "Output files for \$output_location:"
|
||||
ls "\$output_location"
|
||||
done
|
||||
|
||||
echo "Grouping output from \$par_input_falco into \$par_output_falco"
|
||||
mkdir -p "\$par_output_falco"
|
||||
IFS=";" read -ra falco_inputs <<< \$par_input_falco
|
||||
for falco_dir in "\${falco_inputs[@]}"; do
|
||||
echo "Copying contents of \$falco_dir"
|
||||
find -H -D exec "\$falco_dir" -type f -maxdepth 1 -exec cp -t "\$par_output_falco" {} +
|
||||
done
|
||||
VIASHMAIN
|
||||
bash "\$tempscript" &
|
||||
wait "\$!"
|
||||
@@ -1264,7 +1266,17 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
||||
VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT")
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
|
||||
VIASH_PAR_INPUT_FALCO=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_FALCO")
|
||||
unset VIASH_TEST_INPUT_FALCO
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_FALCO; do
|
||||
unset IFS
|
||||
if [ -z "$VIASH_TEST_INPUT_FALCO" ]; then
|
||||
VIASH_TEST_INPUT_FALCO="$(ViashDockerStripAutomount "$var")"
|
||||
else
|
||||
VIASH_TEST_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO;""$(ViashDockerStripAutomount "$var")"
|
||||
fi
|
||||
done
|
||||
VIASH_PAR_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO"
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then
|
||||
VIASH_PAR_INPUT_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_MULTIQC")
|
||||
|
||||
@@ -57,9 +57,6 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -155,10 +152,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/untar"
|
||||
executable: "target/executable/io/untar/untar"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -167,7 +164,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# untar main
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,6 +169,32 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "untar main"
|
||||
echo ""
|
||||
echo "Unpack a .tar file. When the contents of the .tar file is just a single"
|
||||
echo "directory,"
|
||||
echo "put the contents of the directory into the output folder instead of that"
|
||||
echo "directory."
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Tarball file to be unpacked."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " Directory to write the contents of the .tar file to."
|
||||
echo ""
|
||||
echo "Other arguments:"
|
||||
echo " -e, --exclude"
|
||||
echo " type: string"
|
||||
echo " example: docs/figures"
|
||||
echo " Prevents any file or member whose name matches the shell wildcard"
|
||||
echo " (pattern) from being extracted."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -450,9 +476,9 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component io untar"
|
||||
LABEL org.opencontainers.image.created="2025-01-14T11:56:35Z"
|
||||
LABEL org.opencontainers.image.created="2025-03-04T05:45:47Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
|
||||
LABEL org.opencontainers.image.revision="dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
LABEL org.opencontainers.image.revision="795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -567,58 +593,6 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "untar main"
|
||||
echo ""
|
||||
echo "Unpack a .tar file. When the contents of the .tar file is just a single"
|
||||
echo "directory,"
|
||||
echo "put the contents of the directory into the output folder instead of that"
|
||||
echo "directory."
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Tarball file to be unpacked."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " Directory to write the contents of the .tar file to."
|
||||
echo ""
|
||||
echo "Other arguments:"
|
||||
echo " -e, --exclude"
|
||||
echo " type: string"
|
||||
echo " example: docs/figures"
|
||||
echo " Prevents any file or member whose name matches the shell wildcard"
|
||||
echo " (pattern) from being extracted."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -30,6 +30,15 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--falco_dir"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
@@ -50,6 +59,15 @@ argument_groups:
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_falco"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "nextflow_script"
|
||||
path: "main.nf"
|
||||
@@ -62,9 +80,6 @@ description: "Combine fastq files from across samples into one event with a list
|
||||
\ fastq files per orientation."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -146,10 +161,10 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/dataflow/combine_samples"
|
||||
executable: "target/nextflow/dataflow/combine_samples/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -158,7 +173,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// combine_samples main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3071,6 +2839,16 @@ meta = [
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--falco_dir",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -3096,6 +2874,16 @@ meta = [
|
||||
"direction" : "output",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--output_falco",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "output",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -3115,10 +2903,6 @@ meta = [
|
||||
],
|
||||
"description" : "Combine fastq files from across samples into one event with a list of fastq files per orientation.",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3215,10 +2999,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/dataflow/combine_samples",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3232,7 +3016,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3277,10 +3061,12 @@ workflow run_wf {
|
||||
// Gather the following state for all samples
|
||||
def forward_fastqs = states.collect{it.forward_input}.flatten()
|
||||
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten()
|
||||
def falco_dirs = states.collect{it.falco_dir}
|
||||
|
||||
def resultState = [
|
||||
"output_forward": forward_fastqs,
|
||||
"output_reverse": reverse_fastqs,
|
||||
"output_falco": falco_dirs,
|
||||
// The join ID is the same across all samples from the same run
|
||||
"_meta": ["join_id": states[0]._meta.join_id]
|
||||
]
|
||||
|
||||
@@ -43,6 +43,16 @@
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"falco_dir": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, required. ",
|
||||
"help_text": "Type: `file`, required. "
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
@@ -75,6 +85,17 @@
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"output_falco": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, default: `$id.$key.output_falco_*.output_falco_*`, multiple_sep: `\";\"`. ",
|
||||
"help_text": "Type: List of `file`, required, default: `$id.$key.output_falco_*.output_falco_*`, multiple_sep: `\";\"`. "
|
||||
,
|
||||
"default":"$id.$key.output_falco_*.output_falco_*"
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -56,9 +56,6 @@ description: "From a directory containing fastq files, gather the files per samp
|
||||
\ \nand validate according to the contents of the sample sheet.\n"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -140,10 +137,10 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/dataflow/gather_fastqs_and_validate"
|
||||
executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -152,7 +149,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// gather_fastqs_and_validate main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3108,10 +2876,6 @@ meta = [
|
||||
],
|
||||
"description" : "From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3208,10 +2972,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/dataflow/gather_fastqs_and_validate",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3225,7 +2989,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -71,7 +71,7 @@ argument_groups:
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_multiqc"
|
||||
@@ -124,9 +124,6 @@ test_resources:
|
||||
entrypoint: "test_bases2fastq"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -246,10 +243,10 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/demultiplex"
|
||||
executable: "target/nextflow/demultiplex/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
dependencies:
|
||||
- "target/nextflow/io/untar"
|
||||
- "target/nextflow/dataflow/gather_fastqs_and_validate"
|
||||
@@ -267,7 +264,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// demultiplex main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3116,7 +2884,7 @@ meta = [
|
||||
"create_parent" : true,
|
||||
"required" : false,
|
||||
"direction" : "output",
|
||||
"multiple" : false,
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
@@ -3189,10 +2957,6 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3355,10 +3119,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/demultiplex",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3372,7 +3136,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3585,28 +3349,12 @@ workflow run_wf {
|
||||
)
|
||||
|
||||
output_ch = samples_ch
|
||||
|
||||
|
||||
| combine_samples.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"id": state.run_id,
|
||||
"forward_input": state.fastq_forward,
|
||||
"reverse_input": state.fastq_reverse,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"forward_fastqs": "output_forward",
|
||||
"reverse_fastqs": "output_reverse",
|
||||
]
|
||||
)
|
||||
| falco.run(
|
||||
directives: [label: ["lowcpu", "lowmem"]],
|
||||
directives: [label: ["verylowcpu", "lowmem"]],
|
||||
fromState: {id, state ->
|
||||
reverse_fastqs_list = state.reverse_fastqs ? state.reverse_fastqs : []
|
||||
[
|
||||
"input": state.forward_fastqs + reverse_fastqs_list,
|
||||
"outdir": "${state.output_falco}",
|
||||
"input": [state.fastq_forward, state.fastq_reverse],
|
||||
"outdir": "$id/qc/falco",
|
||||
"summary_filename": null,
|
||||
"report_filename": null,
|
||||
"data_filename": null,
|
||||
@@ -3616,11 +3364,28 @@ workflow run_wf {
|
||||
state + [ "output_falco" : result.outdir ]
|
||||
}
|
||||
)
|
||||
|
||||
| combine_samples.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"id": state.run_id,
|
||||
"forward_input": state.fastq_forward,
|
||||
"reverse_input": state.fastq_reverse,
|
||||
"falco_dir": state.output_falco,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"forward_fastqs": "output_forward",
|
||||
"reverse_fastqs": "output_reverse",
|
||||
"output_falco": "output_falco",
|
||||
]
|
||||
)
|
||||
|
||||
| multiqc.run(
|
||||
directives: [label: ["midcpu", "midmem"]],
|
||||
fromState: {id, state ->
|
||||
def new_state = [
|
||||
"input": [state.output_falco],
|
||||
"input": state.output_falco,
|
||||
"output_report": state.output_multiqc,
|
||||
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}'
|
||||
]
|
||||
|
||||
@@ -80,10 +80,10 @@
|
||||
"output_falco": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.output_falco.output_falco`. Directory to write falco output to",
|
||||
"help_text": "Type: `file`, default: `$id.$key.output_falco.output_falco`. Directory to write falco output to"
|
||||
"description": "Type: List of `file`, default: `$id.$key.output_falco_*.output_falco_*`, multiple_sep: `\";\"`. Directory to write falco output to",
|
||||
"help_text": "Type: List of `file`, default: `$id.$key.output_falco_*.output_falco_*`, multiple_sep: `\";\"`. Directory to write falco output to"
|
||||
,
|
||||
"default":"$id.$key.output_falco.output_falco"
|
||||
"default":"$id.$key.output_falco_*.output_falco_*"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -43,9 +43,6 @@ resources:
|
||||
dest: "nextflow_labels.config"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -148,10 +145,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/io/interop_summary_to_csv"
|
||||
executable: "target/nextflow/io/interop_summary_to_csv/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -160,7 +157,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// interop_summary_to_csv main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3095,10 +2863,6 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3220,10 +2984,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/io/interop_summary_to_csv",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3237,7 +3001,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -22,7 +22,7 @@ argument_groups:
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_multiqc"
|
||||
@@ -100,9 +100,6 @@ resources:
|
||||
description: "Publish the processed results of the run"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -198,10 +195,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/io/publish"
|
||||
executable: "target/nextflow/io/publish/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -210,7 +207,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// publish main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3062,7 +2830,7 @@ meta = [
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
@@ -3161,10 +2929,6 @@ meta = [
|
||||
],
|
||||
"description" : "Publish the processed results of the run",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3279,10 +3043,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/io/publish",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3296,7 +3060,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3365,7 +3129,6 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
|
||||
set -eo pipefail
|
||||
|
||||
declare -A input_output_mapping=(["par_input"]="par_output"
|
||||
["par_input_falco"]="par_output_falco"
|
||||
["par_input_multiqc"]="par_output_multiqc"
|
||||
["par_input_run_information"]="par_output_run_information"
|
||||
)
|
||||
@@ -3386,6 +3149,14 @@ do
|
||||
echo "Output files for \\$output_location:"
|
||||
ls "\\$output_location"
|
||||
done
|
||||
|
||||
echo "Grouping output from \\$par_input_falco into \\$par_output_falco"
|
||||
mkdir -p "\\$par_output_falco"
|
||||
IFS=";" read -ra falco_inputs <<< \\$par_input_falco
|
||||
for falco_dir in "\\${falco_inputs[@]}"; do
|
||||
echo "Copying contents of \\$falco_dir"
|
||||
find -H -D exec "\\$falco_dir" -type f -maxdepth 1 -exec cp -t "\\$par_output_falco" {} +
|
||||
done
|
||||
VIASHMAIN
|
||||
bash "$tempscript"
|
||||
'''
|
||||
|
||||
@@ -27,8 +27,8 @@
|
||||
"input_falco": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, required. Directory to write falco output to",
|
||||
"help_text": "Type: `file`, required. Directory to write falco output to"
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write falco output to",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write falco output to"
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -57,9 +57,6 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -155,10 +152,10 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/io/untar"
|
||||
executable: "target/nextflow/io/untar/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
package_config:
|
||||
name: "demultiplex"
|
||||
version: "main"
|
||||
@@ -167,7 +164,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// untar main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3114,10 +2882,6 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3232,10 +2996,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/io/untar",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3249,7 +3013,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -103,9 +103,6 @@ resources:
|
||||
description: "Runner for demultiplexing of raw sequencing data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -194,10 +191,10 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/runner"
|
||||
executable: "target/nextflow/runner/main.nf"
|
||||
viash_version: "0.9.1"
|
||||
git_commit: "dd1f93487f4e908999504e1fcdf97f6c59f743d9"
|
||||
git_remote: "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-16-gdd1f934"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "795abd68688f4f31b0587bc8e4a7de49b6c00825"
|
||||
git_remote: "https://github.com/viash-hub/demultiplex"
|
||||
git_tag: "v0.1.1-18-g795abd6"
|
||||
dependencies:
|
||||
- "target/nextflow/demultiplex"
|
||||
- "target/nextflow/io/publish"
|
||||
@@ -209,7 +206,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/demultiplex/v2/"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.1"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// runner main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.1 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required && arg.direction == "input") {
|
||||
if (arg.required) {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,8 +192,15 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -206,16 +213,6 @@ Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,162 +1666,6 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1882,6 +1723,8 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1894,7 +1737,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1906,17 +1749,33 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile]}
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1947,10 +1806,13 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1967,7 +1829,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value]]
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1998,9 +1860,13 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
return value_
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -2008,17 +1874,18 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [["key": plainName_, value: value_]]
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2692,8 +2559,7 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2850,36 +2716,12 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2892,95 +2734,19 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
|
||||
[join_id, channelId, id_, output_]
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
def stateToAddNoMultiple = state_to_add.findAll{k, v -> !multipleArgs.contains(k)}
|
||||
// First add non multiple arguments
|
||||
|
||||
def overlap = old_state.keySet().intersect(stateToAddNoMultiple.keySet())
|
||||
assert overlap.isEmpty() : "ID $id: multiple entries for " +
|
||||
" argument(s) $overlap were emitted."
|
||||
def return_state = old_state + stateToAddNoMultiple
|
||||
|
||||
// Add `multiple: true` arguments
|
||||
def stateToAddMultiple = state_to_add.findAll{k, v -> multipleArgs.contains(k)}
|
||||
stateToAddMultiple.each {k, v ->
|
||||
def currentKey = return_state.getOrDefault(k, [])
|
||||
def currentKeyList = currentKey instanceof List ? currentKey : [currentKey]
|
||||
currentKeyList.add(v)
|
||||
return_state[k] = currentKeyList
|
||||
}
|
||||
return return_state
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2989,21 +2755,23 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishStates = chNewState
|
||||
def chPublish = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3161,10 +2929,6 @@ meta = [
|
||||
],
|
||||
"description" : "Runner for demultiplexing of raw sequencing data",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3275,10 +3039,10 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/runner",
|
||||
"viash_version" : "0.9.1",
|
||||
"git_commit" : "dd1f93487f4e908999504e1fcdf97f6c59f743d9",
|
||||
"git_remote" : "https://x-access-token:ghs_NbivUxJIyO1sd735kE5b6eTS6tHmjH0IsDUF@github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-16-gdd1f934"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "795abd68688f4f31b0587bc8e4a7de49b6c00825",
|
||||
"git_remote" : "https://github.com/viash-hub/demultiplex",
|
||||
"git_tag" : "v0.1.1-18-g795abd6"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "demultiplex",
|
||||
@@ -3292,7 +3056,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.1",
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3351,9 +3115,9 @@ workflow run_wf {
|
||||
"run_information": state.run_information,
|
||||
"demultiplexer": state.demultiplexer,
|
||||
"skip_copycomplete_check": state.skip_copycomplete_check,
|
||||
"output": "fastq",
|
||||
"output_falco": "qc/fastqc",
|
||||
"output_multiqc": "qc/multiqc_report.html",
|
||||
"output": "$id/fastq",
|
||||
"output_falco": "$id/qc/fastqc",
|
||||
"output_multiqc": "$id/qc/multiqc_report.html",
|
||||
]
|
||||
if (state.run_information) {
|
||||
state_to_pass += ["output_run_information": state.run_information.getName()]
|
||||
|
||||
Reference in New Issue
Block a user